• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "DynamicQuantizationVisitor.hpp"
7 #include "NetworkUtils.hpp"
8 
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/utility/IgnoreUnused.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnn/Types.hpp>
13 
14 #include <limits>
15 
16 namespace armnn
17 {
18 
DynamicQuantizationVisitor(RangeTracker & rangeTracker,Graph & graph)19 DynamicQuantizationVisitor::DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph)
20         : m_RangeTracker(rangeTracker),
21           m_Graph(graph)
22 {}
23 
SetRange(const IConnectableLayer * layer,unsigned int outputIdx,float min,float max)24 void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
25 {
26     m_RangeTracker.SetRange(layer, outputIdx, min, max);
27 }
28 
ForwardParentParameters(const IConnectableLayer * layer)29 void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
30 {
31     for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
32     {
33         const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
34         LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
35         unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
36         const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
37         SetRange(layer, i, parentRange.first, parentRange.second);
38     }
39 }
40 
AddToCalibratedLayers(const IConnectableLayer * layer)41 void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
42 {
43     m_LayersToCalibrate.push_back(layer);
44 }
45 
AddToNonCalibratedLayers(const IConnectableLayer * layer)46 void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
47 {
48     m_LayersNotToCalibrate.push_back(layer);
49 }
50 
FinishVisit()51 void DynamicQuantizationVisitor::FinishVisit()
52 {
53     for (const IConnectableLayer* layer : m_LayersToCalibrate)
54     {
55         std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
56             m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
57         // record them so we can take them out again efficiently afterward
58         m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
59     }
60 }
61 
RemoveDebugLayers()62 void DynamicQuantizationVisitor::RemoveDebugLayers()
63 {
64     for (DebugLayer* debugLayer : m_DebugLayers)
65     {
66         OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
67         proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
68 
69         for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
70         {
71             debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
72             proceedingOutputSlot.Connect(*succeedingInputSlot);
73         }
74         m_Graph.EraseLayer(debugLayer);
75     }
76     m_DebugLayers.clear();
77 }
78 
VisitNonCalibratedLayers()79 void DynamicQuantizationVisitor::VisitNonCalibratedLayers() {
80     RemoveDebugLayers();
81     for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
82     {
83         ForwardParentParameters(layer);
84     }
85 }
86 
VisitAdditionLayer(const IConnectableLayer * layer,const char * name)87 void DynamicQuantizationVisitor::VisitAdditionLayer(const IConnectableLayer* layer,
88                                                     const char* name)
89 {
90     IgnoreUnused(name);
91     SetRange(layer, 0, -20.f, 20.f);
92     AddToCalibratedLayers(layer);
93 }
94 
VisitAbsLayer(const IConnectableLayer * layer,const char * name)95 void DynamicQuantizationVisitor::VisitAbsLayer(const IConnectableLayer* layer,
96                                                const char* name)
97 {
98     IgnoreUnused(name);
99     SetRange(layer, 0, -20.f, 20.f);
100     AddToCalibratedLayers(layer);
101 }
102 
VisitArgMinMaxLayer(const IConnectableLayer * layer,const ArgMinMaxDescriptor & desc,const char * name)103 void DynamicQuantizationVisitor::VisitArgMinMaxLayer(const IConnectableLayer* layer,
104                                                      const ArgMinMaxDescriptor& desc,
105                                                      const char* name)
106 {
107     IgnoreUnused(name);
108     IgnoreUnused(desc);
109     SetRange(layer, 0, -20.f, 20.f);
110     AddToCalibratedLayers(layer);
111 }
112 
VisitBatchNormalizationLayer(const IConnectableLayer * layer,const BatchNormalizationDescriptor & desc,const ConstTensor & mean,const ConstTensor & variance,const ConstTensor & beta,const ConstTensor & gamma,const char * name)113 void DynamicQuantizationVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer,
114                                                               const BatchNormalizationDescriptor& desc,
115                                                               const ConstTensor& mean,
116                                                               const ConstTensor& variance,
117                                                               const ConstTensor& beta,
118                                                               const ConstTensor& gamma,
119                                                               const char* name)
120 {
121     IgnoreUnused(desc);
122     IgnoreUnused(mean);
123     IgnoreUnused(variance);
124     IgnoreUnused(beta);
125     IgnoreUnused(gamma);
126     IgnoreUnused(name);
127     SetRange(layer, 0, -15.0f, 15.0f);
128     AddToCalibratedLayers(layer);
129 }
130 
VisitNormalizationLayer(const IConnectableLayer * layer,const NormalizationDescriptor & desc,const char * name)131 void DynamicQuantizationVisitor::VisitNormalizationLayer(const IConnectableLayer* layer,
132                                  const NormalizationDescriptor& desc,
133                                  const char* name)
134 {
135     IgnoreUnused(desc);
136     IgnoreUnused(name);
137     SetRange(layer, 0, -15.0f, 15.0f);
138     AddToCalibratedLayers(layer);
139 }
140 
VisitConvolution2dLayer(const IConnectableLayer * layer,const Convolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)141 void DynamicQuantizationVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
142                                                          const Convolution2dDescriptor& convolution2dDescriptor,
143                                                          const ConstTensor& weights,
144                                                          const Optional<ConstTensor>& biases,
145                                                          const char* name)
146 {
147     IgnoreUnused(convolution2dDescriptor);
148     IgnoreUnused(weights);
149     IgnoreUnused(biases);
150     IgnoreUnused(name);
151     SetRange(layer, 0, -15.0f, 15.0f);
152     AddToCalibratedLayers(layer);
153 }
154 
VisitDepthwiseConvolution2dLayer(const IConnectableLayer * layer,const DepthwiseConvolution2dDescriptor & desc,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)155 void DynamicQuantizationVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
156                                                                   const DepthwiseConvolution2dDescriptor& desc,
157                                                                   const ConstTensor& weights,
158                                                                   const Optional<ConstTensor>& biases,
159                                                                   const char* name)
160 {
161     IgnoreUnused(desc);
162     IgnoreUnused(weights);
163     IgnoreUnused(biases);
164     IgnoreUnused(name);
165     SetRange(layer, 0, -15.0f, 15.0f);
166     AddToCalibratedLayers(layer);
167 }
168 
VisitActivationLayer(const IConnectableLayer * layer,const ActivationDescriptor & activationDescriptor,const char * name)169 void DynamicQuantizationVisitor::VisitActivationLayer(const IConnectableLayer* layer,
170                                                       const ActivationDescriptor& activationDescriptor,
171                                                       const char* name)
172 {
173     IgnoreUnused(name, activationDescriptor);
174     switch (activationDescriptor.m_Function)
175     {
176         // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
177         case ActivationFunction::Abs:
178         case ActivationFunction::Linear:
179         case ActivationFunction::ReLu:
180         case ActivationFunction::SoftReLu:
181             SetRange(layer, 0, 0.f, 15.f);
182             break;
183         case ActivationFunction::BoundedReLu:
184             SetRange(layer, 0, 0.f, activationDescriptor.m_A);
185             break;
186         case ActivationFunction::TanH:
187             SetRange(layer, 0, -1.f, 1.f);
188             break;
189         case ActivationFunction::LeakyReLu:
190             SetRange(layer, 0, -5.f, 15.f);
191             break;
192         default:
193             SetRange(layer, 0, -15.f, 15.f);
194             break;
195     }
196     AddToCalibratedLayers(layer);
197 }
198 
VisitFullyConnectedLayer(const IConnectableLayer * layer,const FullyConnectedDescriptor & desc,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)199 void DynamicQuantizationVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
200                                                           const FullyConnectedDescriptor& desc,
201                                                           const ConstTensor& weights,
202                                                           const Optional<ConstTensor>& biases,
203                                                           const char *name)
204 {
205     IgnoreUnused(desc);
206     IgnoreUnused(weights);
207     IgnoreUnused(biases);
208     IgnoreUnused(name);
209     SetRange(layer, 0, -15.0f, 15.0f);
210     AddToCalibratedLayers(layer);
211 }
212 
VisitPermuteLayer(const IConnectableLayer * layer,const PermuteDescriptor & permuteDescriptor,const char * name)213 void DynamicQuantizationVisitor::VisitPermuteLayer(const IConnectableLayer* layer,
214                                                    const PermuteDescriptor& permuteDescriptor,
215                                                    const char* name)
216 {
217     IgnoreUnused(permuteDescriptor);
218     IgnoreUnused(name);
219     AddToNonCalibratedLayers(layer);
220 }
221 
VisitSpaceToBatchNdLayer(const IConnectableLayer * layer,const SpaceToBatchNdDescriptor & spaceToBatchNdDescriptor,const char * name)222 void DynamicQuantizationVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
223                                                           const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
224                                                           const char* name)
225 {
226     IgnoreUnused(spaceToBatchNdDescriptor);
227     IgnoreUnused(name);
228     AddToNonCalibratedLayers(layer);
229 }
230 
VisitPooling2dLayer(const IConnectableLayer * layer,const Pooling2dDescriptor & pooling2dDescriptor,const char * name)231 void DynamicQuantizationVisitor::VisitPooling2dLayer(const IConnectableLayer* layer,
232                                                      const Pooling2dDescriptor& pooling2dDescriptor,
233                                                      const char* name)
234 {
235     IgnoreUnused(pooling2dDescriptor);
236     IgnoreUnused(name);
237     AddToNonCalibratedLayers(layer);
238 }
239 
VisitSoftmaxLayer(const IConnectableLayer * layer,const SoftmaxDescriptor & softmaxDescriptor,const char * name)240 void DynamicQuantizationVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer,
241                                                    const SoftmaxDescriptor& softmaxDescriptor,
242                                                    const char* name)
243 {
244     IgnoreUnused(softmaxDescriptor);
245     IgnoreUnused(name);
246     SetRange(layer, 0, 0.f, 1.f);
247     AddToCalibratedLayers(layer);
248 }
249 
VisitConstantLayer(const IConnectableLayer * layer,const ConstTensor & input,const char * name)250 void DynamicQuantizationVisitor::VisitConstantLayer(const IConnectableLayer* layer,
251                                                     const ConstTensor& input,
252                                                     const char* name)
253 {
254     IgnoreUnused(name);
255 
256     if (input.GetDataType() != DataType::Float32)
257     {
258         throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
259     }
260 
261     // Work out the range based on the input constants
262     unsigned int inputNumElements = input.GetNumElements();
263     const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
264 
265     float min = std::numeric_limits<float>::max();
266     float max = std::numeric_limits<float>::lowest();
267 
268     for (unsigned int i = 0; i < inputNumElements; i++)
269     {
270         const float inputValue = inputData[i];
271 
272         min = std::min(min, inputValue);
273         max = std::max(max, inputValue);
274     }
275     SetRange(layer, 0, min, max);
276 }
277 
VisitConcatLayer(const IConnectableLayer * layer,const ConcatDescriptor & originsDescriptor,const char * name)278 void DynamicQuantizationVisitor::VisitConcatLayer(const IConnectableLayer* layer,
279                                                   const ConcatDescriptor& originsDescriptor,
280                                                   const char* name)
281 {
282     IgnoreUnused(name);
283     IgnoreUnused(originsDescriptor);
284     float min = std::numeric_limits<float>::max();
285     float max = std::numeric_limits<float>::lowest();
286     for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
287     {
288         const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
289         LayerGuid layerId = outputSlot->GetOwningLayerGuid();
290         unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
291         RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
292         min = std::min(min, range.first);
293         max = std::max(max, range.second);
294     }
295     SetRange(layer, 0, min, max);
296     AddToCalibratedLayers(layer);
297 }
298 
VisitReshapeLayer(const IConnectableLayer * layer,const ReshapeDescriptor & reshapeDescriptor,const char * name)299 void DynamicQuantizationVisitor::VisitReshapeLayer(const IConnectableLayer* layer,
300                                                    const ReshapeDescriptor& reshapeDescriptor,
301                                                    const char* name)
302 {
303     IgnoreUnused(reshapeDescriptor);
304     IgnoreUnused(name);
305     AddToNonCalibratedLayers(layer);
306 }
307 
VisitSplitterLayer(const IConnectableLayer * layer,const SplitterDescriptor & splitterDescriptor,const char * name)308 void DynamicQuantizationVisitor::VisitSplitterLayer(const IConnectableLayer* layer,
309                                                     const SplitterDescriptor& splitterDescriptor,
310                                                     const char* name)
311 {
312     IgnoreUnused(splitterDescriptor);
313     IgnoreUnused(name);
314     AddToNonCalibratedLayers(layer);
315 }
316 
VisitResizeBilinearLayer(const IConnectableLayer * layer,const ResizeBilinearDescriptor & resizeDesc,const char * name)317 void DynamicQuantizationVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer,
318                                                           const ResizeBilinearDescriptor& resizeDesc,
319                                                           const char* name)
320 {
321     IgnoreUnused(resizeDesc);
322     IgnoreUnused(name);
323     AddToNonCalibratedLayers(layer);
324 }
325 
VisitStridedSliceLayer(const IConnectableLayer * layer,const StridedSliceDescriptor & stridedSliceDescriptor,const char * name)326 void DynamicQuantizationVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer,
327                                                         const StridedSliceDescriptor& stridedSliceDescriptor,
328                                                         const char* name)
329 {
330     IgnoreUnused(stridedSliceDescriptor);
331     IgnoreUnused(name);
332     AddToNonCalibratedLayers(layer);
333 }
334 
VisitBatchToSpaceNdLayer(const IConnectableLayer * layer,const BatchToSpaceNdDescriptor & batchToSpaceNdDescriptor,const char * name)335 void DynamicQuantizationVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
336                                                           const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
337                                                           const char* name)
338 {
339     IgnoreUnused(batchToSpaceNdDescriptor);
340     IgnoreUnused(name);
341     AddToNonCalibratedLayers(layer);
342 }
343 
VisitInputLayer(const IConnectableLayer * layer,LayerBindingId id,const char * name)344 void DynamicQuantizationVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
345 {
346     IgnoreUnused(id);
347     IgnoreUnused(name);
348     SetRange(layer, 0, -0.0f, 0.0f);
349     AddToCalibratedLayers(layer);
350 }
351 
VisitOutputLayer(const IConnectableLayer * layer,LayerBindingId id,const char * name)352 void DynamicQuantizationVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
353 {
354     IgnoreUnused(id);
355     IgnoreUnused(name);
356     AddToNonCalibratedLayers(layer);
357     m_OutputLayers.push_back(id);
358 }
359 
GetOutputLayers()360 const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
361 {
362     return m_OutputLayers;
363 }
364 
365 } //namespace armnn
366