• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClBackend.hpp"
7 #include "ClBackendId.hpp"
8 #include "ClBackendModelContext.hpp"
9 #include "ClWorkloadFactory.hpp"
10 #include "ClBackendContext.hpp"
11 #include "ClLayerSupport.hpp"
12 #include "ClTensorHandleFactory.hpp"
13 
14 #include <armnn/BackendRegistry.hpp>
15 #include <armnn/Descriptors.hpp>
16 
17 #include <aclCommon/ArmComputeSubgraphUtils.hpp>
18 #include <aclCommon/ArmComputeUtils.hpp>
19 #include <aclCommon/BaseMemoryManager.hpp>
20 
21 #include <armnn/backends/IBackendContext.hpp>
22 #include <armnn/backends/IMemoryManager.hpp>
23 #include <armnn/utility/PolymorphicDowncast.hpp>
24 
25 #include "workloads/ClAdditionWorkload.hpp"
26 #include "workloads/ClBatchNormalizationFloatWorkload.hpp"
27 #include "workloads/ClConvolution2dWorkload.hpp"
28 #include "workloads/ClDepthwiseConvolutionWorkload.hpp"
29 #include "workloads/ClDivisionFloatWorkload.hpp"
30 #include "workloads/ClFullyConnectedWorkload.hpp"
31 #include "workloads/ClMultiplicationWorkload.hpp"
32 #include "workloads/ClSubtractionWorkload.hpp"
33 
34 #include <Optimizer.hpp>
35 
36 #include <arm_compute/core/Types.h>
37 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
38 
39 namespace armnn
40 {
41 
GetIdStatic()42 const BackendId& ClBackend::GetIdStatic()
43 {
44     static const BackendId s_Id{ClBackendId()};
45     return s_Id;
46 }
47 
CreateMemoryManager() const48 IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const
49 {
50     return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
51 }
52 
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager) const53 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
54     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
55 {
56     return std::make_unique<ClWorkloadFactory>(
57         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
58 }
59 
CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const ModelOptions & modelOptions) const60 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
61     const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
62 {
63     return std::make_unique<ClWorkloadFactory>(
64         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
65 }
66 
CreateWorkloadFactory(TensorHandleFactoryRegistry & registry) const67 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
68     TensorHandleFactoryRegistry& registry) const
69 {
70     auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
71 
72     registry.RegisterMemoryManager(memoryManager);
73     registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
74 
75     return std::make_unique<ClWorkloadFactory>(
76             PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
77 }
78 
CreateWorkloadFactory(TensorHandleFactoryRegistry & registry,const ModelOptions & modelOptions) const79 IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory(
80     TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
81 {
82     auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
83 
84     registry.RegisterMemoryManager(memoryManager);
85     registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
86 
87     return std::make_unique<ClWorkloadFactory>(
88         PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
89 }
90 
GetHandleFactoryPreferences() const91 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
92 {
93     return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()};
94 }
95 
RegisterTensorHandleFactories(TensorHandleFactoryRegistry & registry)96 void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry)
97 {
98     auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
99 
100     registry.RegisterMemoryManager(mgr);
101     registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
102 }
103 
CreateBackendContext(const IRuntime::CreationOptions & options) const104 IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const
105 {
106     return IBackendContextPtr{new ClBackendContext{options}};
107 }
108 
CreateBackendProfilingContext(const IRuntime::CreationOptions &,IBackendProfilingPtr &)109 IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext(
110     const IRuntime::CreationOptions&, IBackendProfilingPtr&)
111 {
112     return IBackendProfilingContextPtr{};
113 }
114 
GetOptimizations() const115 IBackendInternal::Optimizations ClBackend::GetOptimizations() const
116 {
117     return Optimizations{};
118 }
119 
CreateBackendSpecificModelContext(const ModelOptions & modelOptions) const120 IBackendInternal::IBackendSpecificModelContextPtr ClBackend::CreateBackendSpecificModelContext(
121     const ModelOptions& modelOptions) const
122 {
123     return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
124 }
125 
GetLayerSupport() const126 IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const
127 {
128     static ILayerSupportSharedPtr layerSupport
129         {
130             new ClLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{})
131         };
132     return layerSupport;
133 }
134 
GetLayerSupport(const ModelOptions & modelOptions) const135 IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelOptions& modelOptions) const
136 {
137     static ILayerSupportSharedPtr layerSupport
138     {
139         new ClLayerSupport(CreateBackendSpecificModelContext(modelOptions))
140     };
141     return layerSupport;
142 }
143 
OptimizeSubgraphView(const SubgraphView & subgraph,const ModelOptions & modelOptions) const144 OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
145                                                   const ModelOptions& modelOptions) const
146 {
147     OptimizationViews optimizationViews;
148 
149     auto it = subgraph.end();
150     bool isFastMathEnabled = false;
151     std::map<LayerGuid, Layer*> untouched;
152 
153     while (it != subgraph.begin())
154     {
155         --it;
156         Layer& base = **it;
157         untouched.insert({base.GetGuid(), &base});
158     }
159 
160     it = subgraph.end();
161 #if defined(ARMCOMPUTECL_ENABLED)
162     IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions);
163 
164     if (modelContextPtr)
165     {
166         auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
167         if (clModelOptions)
168         {
169             isFastMathEnabled = clModelOptions->IsFastMathEnabled();
170         }
171     }
172 #endif
173     while (it != subgraph.begin())
174     {
175         --it;
176         Layer& base = **it;
177 
178         if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
179             || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
180             || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
181             || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
182             && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
183         {
184             for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
185             {
186                 if (output->GetNumConnections() == 1)
187                 {
188                     for (auto&& childInput : output->GetConnections())
189                     {
190                         if (childInput->GetOwningLayer().GetType() == LayerType::Activation)
191                         {
192                             Layer& child = childInput->GetOwningLayer();
193 
194                             auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
195 
196                             const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
197                                                      base.GetName();
198 
199                             // Get params from activation layer
200                             ActivationDescriptor activationDesc = activationLayer->GetParameters();
201 
202                             if (base.GetType() == LayerType::Convolution2d)
203                             {
204                                 Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
205 
206                                 Optional<TensorInfo> biases;
207 
208                                 if (baseLayer->GetParameters().m_BiasEnabled)
209                                 {
210                                     biases = baseLayer->m_Bias->GetTensorInfo();
211                                 }
212 
213                                 arm_compute::Status status = ClConvolution2dWorkloadValidate(
214                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
215                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
216                                         baseLayer->GetParameters(),
217                                         baseLayer->m_Weight->GetTensorInfo(),
218                                         biases,
219                                         isFastMathEnabled,
220                                         &activationDesc);
221 
222                                 if (status)
223                                 {
224                                     FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
225                                                                                       baseLayer,
226                                                                                       activationLayer,
227                                                                                       activationDesc,
228                                                                                       name);
229                                     untouched.erase(baseLayer->GetGuid());
230                                     untouched.erase(activationLayer->GetGuid());
231                                 }
232                             }
233                             else if (base.GetType() == LayerType::DepthwiseConvolution2d)
234                             {
235                                 DepthwiseConvolution2dLayer* baseLayer =
236                                         PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
237 
238                                 Optional<TensorInfo> biases;
239 
240                                 if (baseLayer->GetParameters().m_BiasEnabled)
241                                 {
242                                     biases = baseLayer->m_Bias->GetTensorInfo();
243                                 }
244 
245                                 arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate(
246                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
247                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
248                                         baseLayer->GetParameters(),
249                                         baseLayer->m_Weight->GetTensorInfo(),
250                                         biases,
251                                         &activationDesc);
252 
253                                 if (status)
254                                 {
255                                     FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
256                                                                                                baseLayer,
257                                                                                                activationLayer,
258                                                                                                activationDesc,
259                                                                                                name);
260                                     untouched.erase(baseLayer->GetGuid());
261                                     untouched.erase(activationLayer->GetGuid());
262                                 }
263                             }
264                             else if (base.GetType() == LayerType::FullyConnected)
265                             {
266                                 FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
267 
268                                 arm_compute::Status status = ClFullyConnectedWorkloadValidate(
269                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
270                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
271                                         baseLayer->m_Weight->GetTensorInfo(),
272                                         baseLayer->m_Bias->GetTensorInfo(),
273                                         baseLayer->GetParameters(),
274                                         &activationDesc);
275 
276                                 if (status)
277                                 {
278                                     FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
279                                                                                        baseLayer,
280                                                                                        activationLayer,
281                                                                                        activationDesc,
282                                                                                        name);
283                                     untouched.erase(baseLayer->GetGuid());
284                                     untouched.erase(activationLayer->GetGuid());
285                                 }
286                             }
287                             else if (base.GetType() == LayerType::BatchNormalization)
288                             {
289                                 BatchNormalizationLayer* baseLayer =
290                                         PolymorphicDowncast<BatchNormalizationLayer*>(&base);
291 
292                                 arm_compute::Status status = ClBatchNormalizationValidate(
293                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
294                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
295                                         baseLayer->m_Mean->GetTensorInfo(),
296                                         baseLayer->m_Variance->GetTensorInfo(),
297                                         baseLayer->m_Beta->GetTensorInfo(),
298                                         baseLayer->m_Gamma->GetTensorInfo(),
299                                         baseLayer->GetParameters(),
300                                         &activationDesc);
301 
302                                 if (status)
303                                 {
304                                     BatchNormalizationLayer* replacementLayer =
305                                             FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
306                                                                                              baseLayer,
307                                                                                              activationLayer,
308                                                                                              activationDesc,
309                                                                                              name);
310 
311                                     replacementLayer->m_Beta     = std::move(baseLayer->m_Beta);
312                                     replacementLayer->m_Gamma    = std::move(baseLayer->m_Gamma);
313                                     replacementLayer->m_Mean     = std::move(baseLayer->m_Mean);
314                                     replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
315                                     untouched.erase(baseLayer->GetGuid());
316                                     untouched.erase(activationLayer->GetGuid());
317                                 }
318                             }
319                             else if (base.GetType() == LayerType::Addition)
320                             {
321                                 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
322 
323                                 arm_compute::Status status = ClAdditionValidate(
324                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
325                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
326                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
327                                         &activationDesc);
328 
329                                 if (status)
330                                 {
331                                     FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
332                                                                               baseLayer,
333                                                                               activationLayer,
334                                                                               activationDesc,
335                                                                               name);
336                                     untouched.erase(baseLayer->GetGuid());
337                                     untouched.erase(activationLayer->GetGuid());
338                                 }
339                             }
340                             else if (base.GetType() == LayerType::Division)
341                             {
342                                 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
343 
344                                 arm_compute::Status status = ClDivisionWorkloadValidate(
345                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
346                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
347                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
348                                         &activationDesc);
349 
350                                 if (status)
351                                 {
352                                     FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
353                                                                               baseLayer,
354                                                                               activationLayer,
355                                                                               activationDesc,
356                                                                               name);
357                                     untouched.erase(baseLayer->GetGuid());
358                                     untouched.erase(activationLayer->GetGuid());
359                                 }
360                             }
361                             else if (base.GetType() == LayerType::Multiplication)
362                             {
363                                 MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
364 
365                                 arm_compute::Status status = ClMultiplicationWorkloadValidate(
366                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
367                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
368                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
369                                         &activationDesc);
370 
371                                 if (status)
372                                 {
373                                     FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
374                                                                                     baseLayer,
375                                                                                     activationLayer,
376                                                                                     activationDesc,
377                                                                                     name);
378                                     untouched.erase(baseLayer->GetGuid());
379                                     untouched.erase(activationLayer->GetGuid());
380                                 }
381                             }
382                             else if (base.GetType() == LayerType::Subtraction)
383                             {
384                                 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
385 
386                                 arm_compute::Status status = ClSubtractionValidate(
387                                         baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
388                                         baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
389                                         activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
390                                         &activationDesc);
391 
392                                 if (status)
393                                 {
394                                     FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
395                                                                                  baseLayer,
396                                                                                  activationLayer,
397                                                                                  activationDesc,
398                                                                                  name);
399                                     untouched.erase(baseLayer->GetGuid());
400                                     untouched.erase(activationLayer->GetGuid());
401                                 }
402                             }
403                         }
404                     }
405                 }
406             }
407         }
408     }
409 
410     if (optimizationViews.GetSubstitutions().empty())
411     {
412         optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
413     }
414     else
415     {
416         ReportUntouchedLayers(optimizationViews, untouched);
417     }
418 
419     return optimizationViews;
420 }
421 
422 } // namespace armnn
423