1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "DynamicQuantizationVisitor.hpp"
7 #include "NetworkUtils.hpp"
8
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/utility/IgnoreUnused.hpp>
11 #include <armnn/utility/PolymorphicDowncast.hpp>
12 #include <armnn/Types.hpp>
13
14 #include <limits>
15
16 namespace armnn
17 {
18
DynamicQuantizationVisitor(RangeTracker & rangeTracker,Graph & graph)19 DynamicQuantizationVisitor::DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph)
20 : m_RangeTracker(rangeTracker),
21 m_Graph(graph)
22 {}
23
SetRange(const IConnectableLayer * layer,unsigned int outputIdx,float min,float max)24 void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max)
25 {
26 m_RangeTracker.SetRange(layer, outputIdx, min, max);
27 }
28
ForwardParentParameters(const IConnectableLayer * layer)29 void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer)
30 {
31 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
32 {
33 const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection();
34 LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid();
35 unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner();
36 const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex);
37 SetRange(layer, i, parentRange.first, parentRange.second);
38 }
39 }
40
AddToCalibratedLayers(const IConnectableLayer * layer)41 void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer)
42 {
43 m_LayersToCalibrate.push_back(layer);
44 }
45
AddToNonCalibratedLayers(const IConnectableLayer * layer)46 void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer)
47 {
48 m_LayersNotToCalibrate.push_back(layer);
49 }
50
FinishVisit()51 void DynamicQuantizationVisitor::FinishVisit()
52 {
53 for (const IConnectableLayer* layer : m_LayersToCalibrate)
54 {
55 std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter(
56 m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer)));
57 // record them so we can take them out again efficiently afterward
58 m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers));
59 }
60 }
61
RemoveDebugLayers()62 void DynamicQuantizationVisitor::RemoveDebugLayers()
63 {
64 for (DebugLayer* debugLayer : m_DebugLayers)
65 {
66 OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot();
67 proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0));
68
69 for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections())
70 {
71 debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot);
72 proceedingOutputSlot.Connect(*succeedingInputSlot);
73 }
74 m_Graph.EraseLayer(debugLayer);
75 }
76 m_DebugLayers.clear();
77 }
78
VisitNonCalibratedLayers()79 void DynamicQuantizationVisitor::VisitNonCalibratedLayers() {
80 RemoveDebugLayers();
81 for (const IConnectableLayer* layer : m_LayersNotToCalibrate)
82 {
83 ForwardParentParameters(layer);
84 }
85 }
86
VisitAdditionLayer(const IConnectableLayer * layer,const char * name)87 void DynamicQuantizationVisitor::VisitAdditionLayer(const IConnectableLayer* layer,
88 const char* name)
89 {
90 IgnoreUnused(name);
91 SetRange(layer, 0, -20.f, 20.f);
92 AddToCalibratedLayers(layer);
93 }
94
VisitAbsLayer(const IConnectableLayer * layer,const char * name)95 void DynamicQuantizationVisitor::VisitAbsLayer(const IConnectableLayer* layer,
96 const char* name)
97 {
98 IgnoreUnused(name);
99 SetRange(layer, 0, -20.f, 20.f);
100 AddToCalibratedLayers(layer);
101 }
102
VisitArgMinMaxLayer(const IConnectableLayer * layer,const ArgMinMaxDescriptor & desc,const char * name)103 void DynamicQuantizationVisitor::VisitArgMinMaxLayer(const IConnectableLayer* layer,
104 const ArgMinMaxDescriptor& desc,
105 const char* name)
106 {
107 IgnoreUnused(name);
108 IgnoreUnused(desc);
109 SetRange(layer, 0, -20.f, 20.f);
110 AddToCalibratedLayers(layer);
111 }
112
VisitBatchNormalizationLayer(const IConnectableLayer * layer,const BatchNormalizationDescriptor & desc,const ConstTensor & mean,const ConstTensor & variance,const ConstTensor & beta,const ConstTensor & gamma,const char * name)113 void DynamicQuantizationVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer,
114 const BatchNormalizationDescriptor& desc,
115 const ConstTensor& mean,
116 const ConstTensor& variance,
117 const ConstTensor& beta,
118 const ConstTensor& gamma,
119 const char* name)
120 {
121 IgnoreUnused(desc);
122 IgnoreUnused(mean);
123 IgnoreUnused(variance);
124 IgnoreUnused(beta);
125 IgnoreUnused(gamma);
126 IgnoreUnused(name);
127 SetRange(layer, 0, -15.0f, 15.0f);
128 AddToCalibratedLayers(layer);
129 }
130
VisitNormalizationLayer(const IConnectableLayer * layer,const NormalizationDescriptor & desc,const char * name)131 void DynamicQuantizationVisitor::VisitNormalizationLayer(const IConnectableLayer* layer,
132 const NormalizationDescriptor& desc,
133 const char* name)
134 {
135 IgnoreUnused(desc);
136 IgnoreUnused(name);
137 SetRange(layer, 0, -15.0f, 15.0f);
138 AddToCalibratedLayers(layer);
139 }
140
VisitConvolution2dLayer(const IConnectableLayer * layer,const Convolution2dDescriptor & convolution2dDescriptor,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)141 void DynamicQuantizationVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
142 const Convolution2dDescriptor& convolution2dDescriptor,
143 const ConstTensor& weights,
144 const Optional<ConstTensor>& biases,
145 const char* name)
146 {
147 IgnoreUnused(convolution2dDescriptor);
148 IgnoreUnused(weights);
149 IgnoreUnused(biases);
150 IgnoreUnused(name);
151 SetRange(layer, 0, -15.0f, 15.0f);
152 AddToCalibratedLayers(layer);
153 }
154
VisitDepthwiseConvolution2dLayer(const IConnectableLayer * layer,const DepthwiseConvolution2dDescriptor & desc,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)155 void DynamicQuantizationVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer,
156 const DepthwiseConvolution2dDescriptor& desc,
157 const ConstTensor& weights,
158 const Optional<ConstTensor>& biases,
159 const char* name)
160 {
161 IgnoreUnused(desc);
162 IgnoreUnused(weights);
163 IgnoreUnused(biases);
164 IgnoreUnused(name);
165 SetRange(layer, 0, -15.0f, 15.0f);
166 AddToCalibratedLayers(layer);
167 }
168
VisitActivationLayer(const IConnectableLayer * layer,const ActivationDescriptor & activationDescriptor,const char * name)169 void DynamicQuantizationVisitor::VisitActivationLayer(const IConnectableLayer* layer,
170 const ActivationDescriptor& activationDescriptor,
171 const char* name)
172 {
173 IgnoreUnused(name, activationDescriptor);
174 switch (activationDescriptor.m_Function)
175 {
176 // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu
177 case ActivationFunction::Abs:
178 case ActivationFunction::Linear:
179 case ActivationFunction::ReLu:
180 case ActivationFunction::SoftReLu:
181 SetRange(layer, 0, 0.f, 15.f);
182 break;
183 case ActivationFunction::BoundedReLu:
184 SetRange(layer, 0, 0.f, activationDescriptor.m_A);
185 break;
186 case ActivationFunction::TanH:
187 SetRange(layer, 0, -1.f, 1.f);
188 break;
189 case ActivationFunction::LeakyReLu:
190 SetRange(layer, 0, -5.f, 15.f);
191 break;
192 default:
193 SetRange(layer, 0, -15.f, 15.f);
194 break;
195 }
196 AddToCalibratedLayers(layer);
197 }
198
VisitFullyConnectedLayer(const IConnectableLayer * layer,const FullyConnectedDescriptor & desc,const ConstTensor & weights,const Optional<ConstTensor> & biases,const char * name)199 void DynamicQuantizationVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
200 const FullyConnectedDescriptor& desc,
201 const ConstTensor& weights,
202 const Optional<ConstTensor>& biases,
203 const char *name)
204 {
205 IgnoreUnused(desc);
206 IgnoreUnused(weights);
207 IgnoreUnused(biases);
208 IgnoreUnused(name);
209 SetRange(layer, 0, -15.0f, 15.0f);
210 AddToCalibratedLayers(layer);
211 }
212
VisitPermuteLayer(const IConnectableLayer * layer,const PermuteDescriptor & permuteDescriptor,const char * name)213 void DynamicQuantizationVisitor::VisitPermuteLayer(const IConnectableLayer* layer,
214 const PermuteDescriptor& permuteDescriptor,
215 const char* name)
216 {
217 IgnoreUnused(permuteDescriptor);
218 IgnoreUnused(name);
219 AddToNonCalibratedLayers(layer);
220 }
221
VisitSpaceToBatchNdLayer(const IConnectableLayer * layer,const SpaceToBatchNdDescriptor & spaceToBatchNdDescriptor,const char * name)222 void DynamicQuantizationVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer,
223 const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor,
224 const char* name)
225 {
226 IgnoreUnused(spaceToBatchNdDescriptor);
227 IgnoreUnused(name);
228 AddToNonCalibratedLayers(layer);
229 }
230
VisitPooling2dLayer(const IConnectableLayer * layer,const Pooling2dDescriptor & pooling2dDescriptor,const char * name)231 void DynamicQuantizationVisitor::VisitPooling2dLayer(const IConnectableLayer* layer,
232 const Pooling2dDescriptor& pooling2dDescriptor,
233 const char* name)
234 {
235 IgnoreUnused(pooling2dDescriptor);
236 IgnoreUnused(name);
237 AddToNonCalibratedLayers(layer);
238 }
239
VisitSoftmaxLayer(const IConnectableLayer * layer,const SoftmaxDescriptor & softmaxDescriptor,const char * name)240 void DynamicQuantizationVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer,
241 const SoftmaxDescriptor& softmaxDescriptor,
242 const char* name)
243 {
244 IgnoreUnused(softmaxDescriptor);
245 IgnoreUnused(name);
246 SetRange(layer, 0, 0.f, 1.f);
247 AddToCalibratedLayers(layer);
248 }
249
VisitConstantLayer(const IConnectableLayer * layer,const ConstTensor & input,const char * name)250 void DynamicQuantizationVisitor::VisitConstantLayer(const IConnectableLayer* layer,
251 const ConstTensor& input,
252 const char* name)
253 {
254 IgnoreUnused(name);
255
256 if (input.GetDataType() != DataType::Float32)
257 {
258 throw InvalidArgumentException("Quantization is supported only for FP32 tensors");
259 }
260
261 // Work out the range based on the input constants
262 unsigned int inputNumElements = input.GetNumElements();
263 const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea());
264
265 float min = std::numeric_limits<float>::max();
266 float max = std::numeric_limits<float>::lowest();
267
268 for (unsigned int i = 0; i < inputNumElements; i++)
269 {
270 const float inputValue = inputData[i];
271
272 min = std::min(min, inputValue);
273 max = std::max(max, inputValue);
274 }
275 SetRange(layer, 0, min, max);
276 }
277
VisitConcatLayer(const IConnectableLayer * layer,const ConcatDescriptor & originsDescriptor,const char * name)278 void DynamicQuantizationVisitor::VisitConcatLayer(const IConnectableLayer* layer,
279 const ConcatDescriptor& originsDescriptor,
280 const char* name)
281 {
282 IgnoreUnused(name);
283 IgnoreUnused(originsDescriptor);
284 float min = std::numeric_limits<float>::max();
285 float max = std::numeric_limits<float>::lowest();
286 for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i)
287 {
288 const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection();
289 LayerGuid layerId = outputSlot->GetOwningLayerGuid();
290 unsigned int slotIndex = outputSlot->CalculateIndexOnOwner();
291 RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex);
292 min = std::min(min, range.first);
293 max = std::max(max, range.second);
294 }
295 SetRange(layer, 0, min, max);
296 AddToCalibratedLayers(layer);
297 }
298
VisitReshapeLayer(const IConnectableLayer * layer,const ReshapeDescriptor & reshapeDescriptor,const char * name)299 void DynamicQuantizationVisitor::VisitReshapeLayer(const IConnectableLayer* layer,
300 const ReshapeDescriptor& reshapeDescriptor,
301 const char* name)
302 {
303 IgnoreUnused(reshapeDescriptor);
304 IgnoreUnused(name);
305 AddToNonCalibratedLayers(layer);
306 }
307
VisitSplitterLayer(const IConnectableLayer * layer,const SplitterDescriptor & splitterDescriptor,const char * name)308 void DynamicQuantizationVisitor::VisitSplitterLayer(const IConnectableLayer* layer,
309 const SplitterDescriptor& splitterDescriptor,
310 const char* name)
311 {
312 IgnoreUnused(splitterDescriptor);
313 IgnoreUnused(name);
314 AddToNonCalibratedLayers(layer);
315 }
316
VisitResizeBilinearLayer(const IConnectableLayer * layer,const ResizeBilinearDescriptor & resizeDesc,const char * name)317 void DynamicQuantizationVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer,
318 const ResizeBilinearDescriptor& resizeDesc,
319 const char* name)
320 {
321 IgnoreUnused(resizeDesc);
322 IgnoreUnused(name);
323 AddToNonCalibratedLayers(layer);
324 }
325
VisitStridedSliceLayer(const IConnectableLayer * layer,const StridedSliceDescriptor & stridedSliceDescriptor,const char * name)326 void DynamicQuantizationVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer,
327 const StridedSliceDescriptor& stridedSliceDescriptor,
328 const char* name)
329 {
330 IgnoreUnused(stridedSliceDescriptor);
331 IgnoreUnused(name);
332 AddToNonCalibratedLayers(layer);
333 }
334
VisitBatchToSpaceNdLayer(const IConnectableLayer * layer,const BatchToSpaceNdDescriptor & batchToSpaceNdDescriptor,const char * name)335 void DynamicQuantizationVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer,
336 const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor,
337 const char* name)
338 {
339 IgnoreUnused(batchToSpaceNdDescriptor);
340 IgnoreUnused(name);
341 AddToNonCalibratedLayers(layer);
342 }
343
VisitInputLayer(const IConnectableLayer * layer,LayerBindingId id,const char * name)344 void DynamicQuantizationVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
345 {
346 IgnoreUnused(id);
347 IgnoreUnused(name);
348 SetRange(layer, 0, -0.0f, 0.0f);
349 AddToCalibratedLayers(layer);
350 }
351
VisitOutputLayer(const IConnectableLayer * layer,LayerBindingId id,const char * name)352 void DynamicQuantizationVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name)
353 {
354 IgnoreUnused(id);
355 IgnoreUnused(name);
356 AddToNonCalibratedLayers(layer);
357 m_OutputLayers.push_back(id);
358 }
359
GetOutputLayers()360 const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers()
361 {
362 return m_OutputLayers;
363 }
364
365 } //namespace armnn
366