1 //
2 // Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #pragma once
7
8 #include "Types.hpp"
9
10 #include "armnn/ArmNN.hpp"
11 #include <armnn/Logging.hpp>
12 #include <armnn_delegate.hpp>
13 #include <DelegateOptions.hpp>
14 #include <DelegateUtils.hpp>
15 #include <Profiling.hpp>
16 #include <tensorflow/lite/builtin_ops.h>
17 #include <tensorflow/lite/c/builtin_op_data.h>
18 #include <tensorflow/lite/c/common.h>
19 #include <tensorflow/lite/optional_debug_tools.h>
20 #include <tensorflow/lite/kernels/builtin_op_kernels.h>
21 #include <tensorflow/lite/interpreter.h>
22 #include <tensorflow/lite/kernels/register.h>
23
24 #include <string>
25 #include <vector>
26
27 namespace common
28 {
29 /**
30 * @brief Used to load in a network through Tflite Interpreter,
31 * register Armnn Delegate file to it, and run inference
32 * on it against a given backend.
33 * currently it is assumed that the input data will be
34 * cv:MAT (Frame), the assumption is implemented in
35 * PrepareTensors method, it can be generalized later
36 *
37 */
38 template <typename Tout>
39 class ArmnnNetworkExecutor
40 {
41 private:
42 std::unique_ptr<tflite::Interpreter> m_interpreter;
43 std::unique_ptr<tflite::FlatBufferModel> m_model;
44 Profiling m_profiling;
45
46 void PrepareTensors(const void* inputData, const size_t dataBytes);
47
48 template <typename Enumeration>
log_as_int(Enumeration value)49 auto log_as_int(Enumeration value)
50 -> typename std::underlying_type<Enumeration>::type
51 {
52 return static_cast<typename std::underlying_type<Enumeration>::type>(value);
53 }
54
55 public:
56 ArmnnNetworkExecutor() = delete;
57
58 /**
59 * @brief Initializes the network with the given input data.
60 *
61 *
62 * * @param[in] modelPath - Relative path to the model file
63 * * @param[in] backends - The list of preferred backends to run inference on
64 */
65 ArmnnNetworkExecutor(std::string& modelPath,
66 std::vector<armnn::BackendId>& backends,
67 bool isProfilingEnabled = false);
68
69 /**
70 * @brief Returns the aspect ratio of the associated model in the order of width, height.
71 */
72 Size GetImageAspectRatio();
73
74 /**
75 * @brief Returns the data type of the associated model.
76 */
77 armnn::DataType GetInputDataType() const;
78
79 float GetQuantizationScale();
80
81 int GetQuantizationOffset();
82
83 float GetOutputQuantizationScale(int tensorIndex);
84
85 int GetOutputQuantizationOffset(int tensorIndex);
86
87
88 /**
89 * @brief Runs inference on the provided input data, and stores the results
90 * in the provided InferenceResults object.
91 *
92 * @param[in] inputData - input frame data
93 * @param[in] dataBytes - input data size in bytes
94 * @param[out] outResults - Vector of DetectionResult objects used to store the output result.
95 */
96 bool Run(const void *inputData, const size_t dataBytes,
97 InferenceResults<Tout> &outResults);
98 };
99
100 template <typename Tout>
ArmnnNetworkExecutor(std::string & modelPath,std::vector<armnn::BackendId> & preferredBackends,bool isProfilingEnabled)101 ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
102 std::vector<armnn::BackendId>& preferredBackends,
103 bool isProfilingEnabled):
104 m_profiling(isProfilingEnabled)
105 {
106 m_profiling.ProfilingStart();
107 armnn::OptimizerOptionsOpaque optimizerOptions;
108 m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
109 if (m_model == nullptr)
110 {
111 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
112 ARMNN_LOG(error) << errorMessage;
113 throw armnn::Exception(errorMessage);
114 }
115 m_profiling.ProfilingStopAndPrintUs("Loading the model took");
116
117 m_profiling.ProfilingStart();
118 tflite::ops::builtin::BuiltinOpResolver resolver;
119 tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
120 if (m_interpreter->AllocateTensors() != kTfLiteOk)
121 {
122 const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
123 ARMNN_LOG(error) << errorMessage;
124 throw armnn::Exception(errorMessage);
125 }
126 m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
127
128 /* create delegate options */
129 m_profiling.ProfilingStart();
130
131 /* enable fast math optimization */
132 armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
133 optimizerOptions.AddModelOption(modelOptionGpu);
134
135 armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
136 optimizerOptions.AddModelOption(modelOptionCpu);
137 /* enable reduce float32 to float16 optimization */
138 optimizerOptions.SetReduceFp32ToFp16(true);
139
140 armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
141
142 /* create delegate object */
143 std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
144 theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
145 armnnDelegate::TfLiteArmnnDelegateDelete);
146
147 /* Register the delegate file */
148 m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
149 m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
150
151 }
152
153 template<typename Tout>
PrepareTensors(const void * inputData,const size_t dataBytes)154 void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void *inputData, const size_t dataBytes)
155 {
156 size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
157 auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
158 assert(inputTensorSize >= dataBytes);
159 if (inputTensorPtr != nullptr)
160 {
161 memcpy(inputTensorPtr, inputData, inputTensorSize);
162 }
163 else
164 {
165 const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"};
166 ARMNN_LOG(error) << errorMessage;
167 throw armnn::Exception(errorMessage);
168 }
169
170 }
171
172 template <typename Tout>
Run(const void * inputData,const size_t dataBytes,InferenceResults<Tout> & outResults)173 bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
174 InferenceResults<Tout>& outResults)
175 {
176 bool ret = false;
177 m_profiling.ProfilingStart();
178 PrepareTensors(inputData, dataBytes);
179
180 if (m_interpreter->Invoke() == kTfLiteOk)
181 {
182
183
184 ret = true;
185 // Extract the output tensor data.
186 outResults.clear();
187 outResults.reserve(m_interpreter->outputs().size());
188 for (int index = 0; index < m_interpreter->outputs().size(); index++)
189 {
190 size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
191 const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
192 if (p_Output != nullptr) {
193 InferenceResult<float> outRes(p_Output, p_Output + size);
194 outResults.emplace_back(outRes);
195 }
196 else
197 {
198 const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
199 ARMNN_LOG(error) << errorMessage;
200 ret = false;
201 }
202 }
203 }
204 else
205 {
206 const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
207 ARMNN_LOG(error) << errorMessage;
208 }
209 m_profiling.ProfilingStopAndPrintUs("Perform inference");
210 return ret;
211 }
212
213 template <typename Tout>
GetImageAspectRatio()214 Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
215 {
216 assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
217 return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
218 m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
219 }
220
221 template <typename Tout>
GetInputDataType() const222 armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
223 {
224 return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
225 }
226
227 template <typename Tout>
GetQuantizationScale()228 float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
229 {
230 return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
231 }
232
233 template <typename Tout>
GetQuantizationOffset()234 int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
235 {
236 return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
237 }
238
239 template <typename Tout>
GetOutputQuantizationScale(int tensorIndex)240 float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
241 {
242 assert(m_interpreter->outputs().size() > tensorIndex);
243 return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
244 }
245
246 template <typename Tout>
GetOutputQuantizationOffset(int tensorIndex)247 int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
248 {
249 assert(m_interpreter->outputs().size() > tensorIndex);
250 return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
251 }
252
253 }// namespace common