• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "Optimization.hpp"
9 
10 #include <armnnUtils/QuantizeHelper.hpp>
11 
12 #include <armnn/utility/PolymorphicDowncast.hpp>
13 #include <armnnUtils/DataLayoutIndexed.hpp>
14 
15 namespace armnn
16 {
17 namespace optimizations
18 {
19 namespace pad_fold
20 {
GetZeroElement(const TensorInfo & tensorInfo)21 inline float GetZeroElement(const TensorInfo& tensorInfo)
22 {
23     return static_cast<float>(tensorInfo.IsQuantized() ? tensorInfo.GetQuantizationOffset() : 0);
24 }
25 
GetLowestElement(const TensorInfo & tensorInfo)26 inline float GetLowestElement(const TensorInfo& tensorInfo)
27 {
28     constexpr float negativeInfinity = -std::numeric_limits<float>::infinity();
29     const float scale = tensorInfo.GetQuantizationScale();
30     const int32_t offset = tensorInfo.GetQuantizationOffset();
31 
32     switch (tensorInfo.GetDataType())
33     {
34         case DataType::Float16:
35             return armnnUtils::SelectiveQuantize<armnn::Half>(negativeInfinity, scale, offset);
36         case DataType::Float32:
37             return armnnUtils::SelectiveQuantize<float>(negativeInfinity, scale, offset);
38         case DataType::QAsymmU8:
39             return armnnUtils::SelectiveQuantize<uint8_t>(negativeInfinity, scale, offset);
40         case DataType::QSymmS16:
41             return armnnUtils::SelectiveQuantize<int16_t>(negativeInfinity, scale, offset);
42         case DataType::QSymmS8:
43             // Fall-through
44         case DataType::QAsymmS8:
45             return armnnUtils::SelectiveQuantize<int8_t>(negativeInfinity, scale, offset);
46         case DataType::BFloat16:
47             return armnnUtils::SelectiveQuantize<armnn::BFloat16>(negativeInfinity, scale, offset);
48         default:
49         {
50             ARMNN_ASSERT_MSG(false, "Unsupported DataType");
51             return NAN;
52         }
53     }
54 }
55 
IsNeutralElement(const Convolution2dDescriptor &,const TensorInfo & tensorInfo,const float tensorValue)56 inline bool IsNeutralElement(const Convolution2dDescriptor&, const TensorInfo& tensorInfo, const float tensorValue)
57 {
58     return tensorValue == GetZeroElement(tensorInfo);
59 }
60 
IsNeutralElement(const DepthwiseConvolution2dDescriptor &,const TensorInfo & tensorInfo,const float tensorValue)61 inline bool IsNeutralElement(const DepthwiseConvolution2dDescriptor&,
62                              const TensorInfo& tensorInfo,
63                              const float tensorValue)
64 {
65     return tensorValue == GetZeroElement(tensorInfo);
66 }
67 
IsNeutralElement(const Pooling2dDescriptor & descriptor,const TensorInfo & tensorInfo,const float tensorValue)68 inline bool IsNeutralElement(
69     const Pooling2dDescriptor& descriptor, const TensorInfo& tensorInfo, const float tensorValue)
70 {
71     return (descriptor.m_PoolType == PoolingAlgorithm::Max)
72         ? tensorValue <= GetLowestElement(tensorInfo)
73         : tensorValue == GetZeroElement(tensorInfo);
74 }
75 
IsPooling2dPadded(const Pooling2dDescriptor & poolDescriptor)76 inline bool IsPooling2dPadded(const Pooling2dDescriptor& poolDescriptor)
77 {
78     const auto poolingPadValues = std::make_tuple(poolDescriptor.m_PadLeft, poolDescriptor.m_PadRight,
79                                                   poolDescriptor.m_PadTop, poolDescriptor.m_PadBottom);
80     if (poolingPadValues != std::make_tuple(0U, 0U, 0U, 0U))
81     {
82         return true;
83     }
84     return false;
85 }
86 
87 template <typename Descriptor>
TryFoldPadIntoLayer2d(const PadDescriptor & padDescriptor,Descriptor & layerDescriptor,const TensorInfo & tensorInfo)88 bool TryFoldPadIntoLayer2d(
89     const PadDescriptor& padDescriptor, Descriptor& layerDescriptor, const TensorInfo& tensorInfo)
90 {
91     armnnUtils::DataLayoutIndexed layout = armnnUtils::DataLayoutIndexed(layerDescriptor.m_DataLayout);
92     constexpr unsigned int batchIndex = 0;
93 
94     constexpr auto noPad = std::make_pair(0U, 0U);
95 
96     if ((!IsNeutralElement(layerDescriptor, tensorInfo, padDescriptor.m_PadValue)) ||
97         (padDescriptor.m_PadList[batchIndex] != noPad) || (padDescriptor.m_PadList[layout.GetChannelsIndex()] != noPad))
98     {
99         return false;
100     }
101 
102     const auto& padList = padDescriptor.m_PadList;
103 
104     // In Convolution2dDescriptor/Pooling2dDescriptor, padLeft and padRight are defined as paddings
105     // on width dimension whereas padTop and padBottom - paddings on height dimension, so updating
106     // these according to data layout
107     layerDescriptor.m_PadLeft += padList[layout.GetWidthIndex()].first;
108     layerDescriptor.m_PadRight += padList[layout.GetWidthIndex()].second;
109     layerDescriptor.m_PadTop += padList[layout.GetHeightIndex()].first;
110     layerDescriptor.m_PadBottom += padList[layout.GetHeightIndex()].second;
111 
112     return true;
113 }
114 
TryFoldPadIntoLayer2d(const PadDescriptor & padDescriptor,Pooling2dDescriptor & poolDescriptor,const TensorInfo & tensorInfo,bool isBackendOptimization=false)115 inline bool TryFoldPadIntoLayer2d(const PadDescriptor& padDescriptor,
116                                   Pooling2dDescriptor& poolDescriptor,
117                                   const TensorInfo& tensorInfo,
118                                   bool isBackendOptimization = false)
119 {
120     // Cannot fold Average or L2 pooling if padding exists and the padding method is Exclude.
121     if (poolDescriptor.m_PoolType != PoolingAlgorithm::Max &&
122         IsPooling2dPadded(poolDescriptor) &&
123         poolDescriptor.m_PaddingMethod == PaddingMethod::Exclude)
124     {
125         return false;
126     }
127 
128     // Cannot fold Average pooling if data type is quantized and layout is NHWC in Neon backend.
129     // Therefore, this specific case will become a backend specific optimization.
130     if  (!isBackendOptimization &&
131          tensorInfo.IsQuantized() &&
132          poolDescriptor.m_PoolType == PoolingAlgorithm::Average &&
133          poolDescriptor.m_DataLayout == DataLayout::NHWC)
134     {
135         return false;
136     }
137 
138     poolDescriptor.m_PaddingMethod = PaddingMethod::IgnoreValue;
139 
140     return TryFoldPadIntoLayer2d<Pooling2dDescriptor>(padDescriptor, poolDescriptor, tensorInfo);
141 }
142 
143 template <typename Layer2dT>
FoldPadIntoLayer2dImpl(Graph & graph,InputSlot & connection)144 Layer2dT* FoldPadIntoLayer2dImpl(Graph& graph, InputSlot& connection)
145 {
146     PadLayer& padLayer = *PolymorphicDowncast<PadLayer*>(&connection.GetConnectedOutputSlot()->GetOwningLayer());
147     Layer2dT& layer2d = *PolymorphicDowncast<Layer2dT*>(&connection.GetOwningLayer());
148 
149     const PadDescriptor& padDescriptor = padLayer.GetParameters();
150     auto newLayer2dDescriptor = layer2d.GetParameters();
151 
152     if (!TryFoldPadIntoLayer2d(padDescriptor, newLayer2dDescriptor, padLayer.GetOutputSlot().GetTensorInfo()))
153     {
154         return nullptr;
155     }
156 
157     // Save original parent output slot of the pad layer
158     OutputSlot& parentSlot = *padLayer.GetInputSlot(0).GetConnectedOutputSlot();
159 
160     // Insert new layer2d layer between the pad layer and its parent layer.
161     const std::string name = std::string("folded-") + padLayer.GetName() + "-into-" + layer2d.GetName();
162     auto& newLayer2d = *graph.InsertNewLayer<Layer2dT>(padLayer.GetInputSlot(0), newLayer2dDescriptor, name.c_str());
163 
164     newLayer2d.GetOutputSlot().MoveAllConnections(parentSlot);
165     // Start at 1 to connect only weights and bias
166     for (unsigned int i = 1; i < layer2d.GetNumInputSlots(); ++i)
167     {
168         if (layer2d.GetInputSlot(i).GetConnectedOutputSlot() != nullptr)
169         {
170             Layer& tgtLayer = layer2d.GetInputSlot(i).GetConnectedOutputSlot()->GetOwningLayer();
171             // Remove old connection and connect to new layer2d
172             tgtLayer.GetOutputSlot(0).Disconnect(layer2d.GetInputSlot(i));
173             tgtLayer.GetOutputSlot(0).Connect(newLayer2d.GetInputSlot(i));
174         }
175     }
176 
177     // Moves connections in old layer2d layer output to new layer.
178     // Old layer2d layer will be removed as it's left unconnected.
179     // Pad layer will be removed if left unconnected.
180     layer2d.GetOutputSlot().MoveAllConnections(newLayer2d.GetOutputSlot());
181 
182     return &newLayer2d;
183 }
184 
185 class FoldPadIntoConvolution2dImpl
186 {
187 public:
Run(Graph & graph,InputSlot & connection) const188     void Run(Graph& graph, InputSlot& connection) const
189     {
190         const auto newConv2dLayer = FoldPadIntoLayer2dImpl<Convolution2dLayer>(graph, connection);
191 
192         if (newConv2dLayer != nullptr)
193         {
194             const auto conv2dLayer = PolymorphicDowncast<Convolution2dLayer*>(&connection.GetOwningLayer());
195             ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
196                              "FoldPadIntoConvolution2d: New convolution layer is missing connection to weights layer");
197 
198             if (conv2dLayer->GetParameters().m_BiasEnabled)
199             {
200                 ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
201                                  "FoldPadIntoConvolution2d: New convolution layer is missing "
202                                  "connection to bias layer.");
203             }
204         }
205     }
206 
207 protected:
208     FoldPadIntoConvolution2dImpl() =  default;
209     ~FoldPadIntoConvolution2dImpl() = default;
210 };
211 
212 class FoldPadIntoDepthwiseConvolution2dImpl
213 {
214 public:
Run(Graph & graph,InputSlot & connection) const215     void Run(Graph& graph, InputSlot& connection) const
216     {
217         const auto newConv2dLayer = FoldPadIntoLayer2dImpl<DepthwiseConvolution2dLayer>(graph, connection);
218 
219         if (newConv2dLayer != nullptr)
220         {
221             const auto conv2dLayer = PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&connection.GetOwningLayer());
222             ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(1).GetConnection() != nullptr,
223                              "FoldPadIntoDepthwiseConvolution2d: New convolution layer is missing "
224                              "connection to weights layer");
225 
226             if (conv2dLayer->GetParameters().m_BiasEnabled)
227             {
228                 ARMNN_ASSERT_MSG(newConv2dLayer->GetInputSlot(2).GetConnection() != nullptr,
229                                  "FoldPadIntoConvolution2d: New convolution layer is missing "
230                                  "connection to bias layer.");
231             }
232         }
233     }
234 protected:
235     FoldPadIntoDepthwiseConvolution2dImpl() =  default;
236     ~FoldPadIntoDepthwiseConvolution2dImpl() = default;
237 };
238 
239 class FoldPadIntoPooling2dImpl
240 {
241 public:
Run(Graph & graph,InputSlot & connection) const242     void Run(Graph& graph, InputSlot& connection) const
243     {
244         FoldPadIntoLayer2dImpl<Pooling2dLayer>(graph, connection);
245     }
246 
247 protected:
248     FoldPadIntoPooling2dImpl() =  default;
249     ~FoldPadIntoPooling2dImpl() = default;
250 };
251 } // namespace pad_fold
252 
253 using FoldPadIntoConvolution2d =
254     OptimizeForExclusiveConnection<PadLayer, Convolution2dLayer, pad_fold::FoldPadIntoConvolution2dImpl>;
255 using FoldPadIntoDepthwiseConvolution2d =
256     OptimizeForExclusiveConnection <PadLayer,
257                                     DepthwiseConvolution2dLayer,
258                                     pad_fold::FoldPadIntoDepthwiseConvolution2dImpl>;
259 using FoldPadIntoPooling2d =
260     OptimizeForExclusiveConnection<PadLayer, Pooling2dLayer, pad_fold::FoldPadIntoPooling2dImpl>;
261 
262 } // namespace optimizations
263 } // namespace armnn
264 
265 
266