1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "NetworkQuantizer.hpp"
7 #include "NetworkQuantizerUtils.hpp"
8 #include "Graph.hpp"
9 #include "Layer.hpp"
10 #include "Network.hpp"
11 #include "DynamicQuantizationVisitor.hpp"
12 #include "StaticRangeVisitor.hpp"
13 #include "QuantizerVisitor.hpp"
14 #include "OverrideInputRangeVisitor.hpp"
15
16 #include <TensorIOUtils.hpp>
17
18 #include <armnn/ILayerVisitor.hpp>
19 #include <armnn/INetwork.hpp>
20 #include <armnn/Tensor.hpp>
21 #include <armnn/Types.hpp>
22
23 #include <armnnUtils/TensorUtils.hpp>
24 #include <armnn/utility/PolymorphicDowncast.hpp>
25
26 #include <mapbox/variant.hpp>
27
28 #include <vector>
29 #include <cmath>
30
31 namespace armnn
32 {
33
34 using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
35
CreateRaw(INetwork * inputNetwork,const QuantizerOptions & options)36 INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
37 {
38 return new NetworkQuantizer(inputNetwork, options);
39 }
40
Create(INetwork * inputNetwork,const QuantizerOptions & options)41 INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options)
42 {
43 return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
44 }
45
Destroy(INetworkQuantizer * quantizer)46 void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer)
47 {
48 delete PolymorphicDowncast<NetworkQuantizer*>(quantizer);
49 }
50
OverrideInputRange(LayerBindingId layerId,float min,float max)51 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
52 {
53 const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph();
54 auto inputLayers = graph.GetInputLayers();
55
56 // Walk the input layers of the graph and override the quantization parameters of the one with the given id
57 OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
58 VisitLayers(inputLayers, overrideInputRangeVisitor);
59 }
60
Refine(const InputTensors & inputTensors)61 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
62 {
63 // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
64 // will not have been created. Need to get the environment set up, Runtime loaded,
65 // DynamicQuantizationVisitor created and run over the network to initialise itself
66 // and the RangeTracker the Debug callback registered and an initial inference
67 // done to set up the first min/max values
68 if (!m_Runtime)
69 {
70 m_RefineCount = 0;
71 m_Ranges.SetDynamicMode(true);
72 const Graph& cGraph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
73
74 // need to insert Debug layers in the DynamicQuantizationVisitor
75 Graph& graph = const_cast<Graph&>(cGraph);
76
77 // Initialize RangeTracker to the default values for each layer.
78 // The default values are overwritten by the min/max that is
79 // recorded during the first dataset min/max calibration. This
80 // initialisation is only required for the first call of Refine().
81 m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
82 VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
83
84 IRuntime::CreationOptions options;
85 m_Runtime = IRuntime::Create(options);
86
87 // Optimize network - debug already enabled for layers that require quantization
88 OptimizerOptions optimizerOptions(false, false);
89 std::vector<BackendId> backends = {"CpuRef"};
90 IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
91 backends,
92 m_Runtime->GetDeviceSpec(),
93 optimizerOptions);
94
95 m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
96
97 // Debug callback function to refine min/max in RangeTracker
98 auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
99 // Get min/max pair from tensor data
100 std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
101
102 // For first calibration dataset, set min/max range in RangeTracker to
103 // min/max ranges gathered during inference
104 if (m_RefineCount == 0)
105 {
106 m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
107 }
108 else
109 {
110 // For every other calibration dataset, only set min/max range if the
111 // values gathered are less than / greater than originally recorded.
112 m_Ranges.RefineMin(guid, slotIndex, minMax.first);
113 m_Ranges.RefineMax(guid, slotIndex, minMax.second);
114 }
115 };
116
117 m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
118 }
119
120 // Create output tensor for EnqueueWorkload
121 std::vector<armnn::BindingPointInfo> outputBindings;
122 auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
123 std::vector<TContainer> outputVectors;
124 for (auto outputLayerBindingId : outputLayers)
125 {
126 auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
127 outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
128 outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129 }
130 OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
131
132 // Execute EnqueueWorkload with calibration image
133 m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
134 ++m_RefineCount;
135 }
136
ExportNetwork()137 INetworkPtr NetworkQuantizer::ExportNetwork()
138 {
139 const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
140
141 // Step 1) Walk the graph and populate default min/max values for
142 // intermediate tensors, only if Runtime does not exist (created
143 // if Refine has been called)
144 if (!m_Runtime)
145 {
146 m_Ranges.SetDynamicMode(false);
147 StaticRangeVisitor rangeVisitor(m_Ranges);
148 VisitLayers(graph, rangeVisitor);
149 }
150 else
151 {
152 // Set min/max range of non-calibrated layers to parent layer's range
153 m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
154 // now tear down the runtime and the dynamic visitor.
155 m_Runtime.reset(nullptr);
156 m_DynamicQuantizationVisitor = EmptyOptional();
157 m_RefineCount = 0;
158 }
159
160 // Step 2) Convert input InputNetwork to Quantized InputNetwork
161 std::unique_ptr<IQuantizationScheme> quantizationScheme;
162 switch (m_Options.m_ActivationFormat)
163 {
164 case DataType::QAsymmU8:
165 quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
166 break;
167 case DataType::QAsymmS8:
168 quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
169 break;
170 case DataType::QSymmS8:
171 quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
172 break;
173 case DataType::QSymmS16:
174 quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
175 break;
176 default:
177 throw InvalidArgumentException("Unsupported quantization target");
178 }
179
180 QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
181 VisitLayers(graph, quantizerVisitor);
182
183 // clear the ranges
184 m_Ranges.Reset();
185
186 return quantizerVisitor.RetrieveFinalNetwork();
187 }
188
189 } //namespace armn
190