• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NetworkQuantizer.hpp"
7 #include "NetworkQuantizerUtils.hpp"
8 #include "Graph.hpp"
9 #include "Layer.hpp"
10 #include "Network.hpp"
11 #include "DynamicQuantizationVisitor.hpp"
12 #include "StaticRangeVisitor.hpp"
13 #include "QuantizerVisitor.hpp"
14 #include "OverrideInputRangeVisitor.hpp"
15 
16 #include <TensorIOUtils.hpp>
17 
18 #include <armnn/ILayerVisitor.hpp>
19 #include <armnn/INetwork.hpp>
20 #include <armnn/Tensor.hpp>
21 #include <armnn/Types.hpp>
22 
23 #include <armnnUtils/TensorUtils.hpp>
24 #include <armnn/utility/PolymorphicDowncast.hpp>
25 
26 #include <mapbox/variant.hpp>
27 
28 #include <vector>
29 #include <cmath>
30 
31 namespace armnn
32 {
33 
34 using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>>;
35 
CreateRaw(INetwork * inputNetwork,const QuantizerOptions & options)36 INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options)
37 {
38     return new NetworkQuantizer(inputNetwork, options);
39 }
40 
Create(INetwork * inputNetwork,const QuantizerOptions & options)41 INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options)
42 {
43     return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy);
44 }
45 
Destroy(INetworkQuantizer * quantizer)46 void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer)
47 {
48     delete PolymorphicDowncast<NetworkQuantizer*>(quantizer);
49 }
50 
OverrideInputRange(LayerBindingId layerId,float min,float max)51 void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max)
52 {
53     const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph();
54     auto inputLayers = graph.GetInputLayers();
55 
56     // Walk the input layers of the graph and override the quantization parameters of the one with the given id
57     OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max});
58     VisitLayers(inputLayers, overrideInputRangeVisitor);
59 }
60 
Refine(const InputTensors & inputTensors)61 void NetworkQuantizer::Refine(const InputTensors& inputTensors)
62 {
63     // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor
64     // will not have been created. Need to get the environment set up, Runtime loaded,
65     // DynamicQuantizationVisitor created and run over the network to initialise itself
66     // and the RangeTracker the Debug callback registered and an initial inference
67     // done to set up the first min/max values
68     if (!m_Runtime)
69     {
70         m_RefineCount = 0;
71         m_Ranges.SetDynamicMode(true);
72         const Graph& cGraph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
73 
74         // need to insert Debug layers in the DynamicQuantizationVisitor
75         Graph& graph = const_cast<Graph&>(cGraph);
76 
77         // Initialize RangeTracker to the default values for each layer.
78         // The default values are overwritten by the min/max that is
79         // recorded during the first dataset min/max calibration. This
80         // initialisation is only required for the first call of Refine().
81         m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph);
82         VisitLayers(cGraph, m_DynamicQuantizationVisitor.value());
83 
84         IRuntime::CreationOptions options;
85         m_Runtime = IRuntime::Create(options);
86 
87         // Optimize network - debug already enabled for layers that require quantization
88         OptimizerOptions optimizerOptions(false, false);
89         std::vector<BackendId> backends = {"CpuRef"};
90         IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork,
91                                                      backends,
92                                                      m_Runtime->GetDeviceSpec(),
93                                                      optimizerOptions);
94 
95         m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet));
96 
97         // Debug callback function to refine min/max in RangeTracker
98         auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) {
99             // Get min/max pair from tensor data
100             std::pair<float, float> minMax = armnnUtils::FindMinMax(tensorHandle);
101 
102             // For first calibration dataset, set min/max range in RangeTracker to
103             // min/max ranges gathered during inference
104             if (m_RefineCount == 0)
105             {
106                 m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second);
107             }
108             else
109             {
110                 // For every other calibration dataset, only set min/max range if the
111                 // values gathered are less than / greater than originally recorded.
112                 m_Ranges.RefineMin(guid, slotIndex, minMax.first);
113                 m_Ranges.RefineMax(guid, slotIndex, minMax.second);
114             }
115         };
116 
117         m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback);
118     }
119 
120     // Create output tensor for EnqueueWorkload
121     std::vector<armnn::BindingPointInfo> outputBindings;
122     auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers();
123     std::vector<TContainer> outputVectors;
124     for (auto outputLayerBindingId : outputLayers)
125     {
126         auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId);
127         outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo));
128         outputVectors.push_back(std::vector<float>(outputTensorInfo.GetNumElements(), 0));
129     }
130     OutputTensors outputTensors = armnnUtils::MakeOutputTensors<TContainer>(outputBindings, outputVectors);
131 
132     // Execute EnqueueWorkload with calibration image
133     m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
134     ++m_RefineCount;
135 }
136 
ExportNetwork()137 INetworkPtr NetworkQuantizer::ExportNetwork()
138 {
139     const Graph& graph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort();
140 
141     // Step 1) Walk the graph and populate default min/max values for
142     // intermediate tensors, only if Runtime does not exist (created
143     // if Refine has been called)
144     if (!m_Runtime)
145     {
146         m_Ranges.SetDynamicMode(false);
147         StaticRangeVisitor rangeVisitor(m_Ranges);
148         VisitLayers(graph, rangeVisitor);
149     }
150     else
151     {
152         // Set min/max range of non-calibrated layers to parent layer's range
153         m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers();
154         // now tear down the runtime and the dynamic visitor.
155         m_Runtime.reset(nullptr);
156         m_DynamicQuantizationVisitor = EmptyOptional();
157         m_RefineCount = 0;
158     }
159 
160     // Step 2) Convert input InputNetwork to Quantized InputNetwork
161     std::unique_ptr<IQuantizationScheme> quantizationScheme;
162     switch (m_Options.m_ActivationFormat)
163     {
164         case DataType::QAsymmU8:
165             quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>();
166             break;
167         case DataType::QAsymmS8:
168             quantizationScheme = std::make_unique<QAsymmS8QuantizationScheme>();
169             break;
170         case DataType::QSymmS8:
171             quantizationScheme = std::make_unique<QSymmS8QuantizationScheme>();
172             break;
173         case DataType::QSymmS16:
174             quantizationScheme = std::make_unique<QSymm16QuantizationScheme>();
175             break;
176         default:
177             throw InvalidArgumentException("Unsupported quantization target");
178     }
179 
180     QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType);
181     VisitLayers(graph, quantizerVisitor);
182 
183     // clear the ranges
184     m_Ranges.Reset();
185 
186     return quantizerVisitor.RetrieveFinalNetwork();
187 }
188 
189 } //namespace armn
190