• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include "Types.hpp"
9 
10 #include "armnn/ArmNN.hpp"
11 #include <armnn/Logging.hpp>
12 #include <armnn_delegate.hpp>
13 #include <DelegateOptions.hpp>
14 #include <DelegateUtils.hpp>
15 #include <Profiling.hpp>
16 #include <tensorflow/lite/builtin_ops.h>
17 #include <tensorflow/lite/c/builtin_op_data.h>
18 #include <tensorflow/lite/c/common.h>
19 #include <tensorflow/lite/optional_debug_tools.h>
20 #include <tensorflow/lite/kernels/builtin_op_kernels.h>
21 #include <tensorflow/lite/interpreter.h>
22 #include <tensorflow/lite/kernels/register.h>
23 
24 #include <string>
25 #include <vector>
26 
27 namespace common
28 {
29 /**
30 * @brief Used to load in a network through Tflite Interpreter,
31 *        register Armnn Delegate file to it, and run inference
32 *        on it against a given backend.
33 *        currently it is assumed that the input data will be
34 *        cv:MAT (Frame), the assumption is implemented in
35 *        PrepareTensors method, it can be generalized later
36 *
37 */
38 template <typename Tout>
39 class ArmnnNetworkExecutor
40 {
41 private:
42     std::unique_ptr<tflite::Interpreter> m_interpreter;
43     std::unique_ptr<tflite::FlatBufferModel> m_model;
44     Profiling m_profiling;
45 
46     void PrepareTensors(const void* inputData, const size_t dataBytes);
47 
48     template <typename Enumeration>
log_as_int(Enumeration value)49     auto log_as_int(Enumeration value)
50     -> typename std::underlying_type<Enumeration>::type
51     {
52         return static_cast<typename std::underlying_type<Enumeration>::type>(value);
53     }
54 
55 public:
56     ArmnnNetworkExecutor() = delete;
57 
58     /**
59     * @brief Initializes the network with the given input data.
60     *
61     *
62     *       * @param[in] modelPath - Relative path to the model file
63     *       * @param[in] backends - The list of preferred backends to run inference on
64     */
65     ArmnnNetworkExecutor(std::string& modelPath,
66                          std::vector<armnn::BackendId>& backends,
67                          bool isProfilingEnabled = false);
68 
69     /**
70     * @brief Returns the aspect ratio of the associated model in the order of width, height.
71     */
72     Size GetImageAspectRatio();
73 
74     /**
75     * @brief Returns the data type of the associated model.
76     */
77     armnn::DataType GetInputDataType() const;
78 
79     float GetQuantizationScale();
80 
81     int GetQuantizationOffset();
82 
83     float GetOutputQuantizationScale(int tensorIndex);
84 
85     int GetOutputQuantizationOffset(int tensorIndex);
86 
87 
88     /**
89     * @brief Runs inference on the provided input data, and stores the results
90     * in the provided InferenceResults object.
91     *
92     * @param[in] inputData - input frame data
93     * @param[in] dataBytes - input data size in bytes
94     * @param[out] outResults - Vector of DetectionResult objects used to store the output result.
95     */
96     bool Run(const void *inputData, const size_t dataBytes,
97              InferenceResults<Tout> &outResults);
98 };
99 
100 template <typename Tout>
ArmnnNetworkExecutor(std::string & modelPath,std::vector<armnn::BackendId> & preferredBackends,bool isProfilingEnabled)101 ArmnnNetworkExecutor<Tout>::ArmnnNetworkExecutor(std::string& modelPath,
102                                            std::vector<armnn::BackendId>& preferredBackends,
103                                            bool isProfilingEnabled):
104                                            m_profiling(isProfilingEnabled)
105 {
106     m_profiling.ProfilingStart();
107     armnn::OptimizerOptionsOpaque optimizerOptions;
108     m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str());
109     if (m_model == nullptr)
110     {
111         const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"};
112         ARMNN_LOG(error) << errorMessage;
113         throw armnn::Exception(errorMessage);
114     }
115     m_profiling.ProfilingStopAndPrintUs("Loading the model took");
116 
117     m_profiling.ProfilingStart();
118     tflite::ops::builtin::BuiltinOpResolver resolver;
119     tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter);
120     if (m_interpreter->AllocateTensors() != kTfLiteOk)
121     {
122         const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"};
123         ARMNN_LOG(error) << errorMessage;
124         throw armnn::Exception(errorMessage);
125     }
126     m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter");
127 
128     /* create delegate options */
129     m_profiling.ProfilingStart();
130 
131     /* enable fast math optimization */
132     armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}});
133     optimizerOptions.AddModelOption(modelOptionGpu);
134 
135     armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}});
136     optimizerOptions.AddModelOption(modelOptionCpu);
137     /* enable reduce float32 to float16 optimization */
138     optimizerOptions.SetReduceFp32ToFp16(true);
139 
140     armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions);
141 
142     /* create delegate object */
143     std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
144                 theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
145                                  armnnDelegate::TfLiteArmnnDelegateDelete);
146 
147     /* Register the delegate file */
148     m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
149     m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate");
150 
151 }
152 
153 template<typename Tout>
PrepareTensors(const void * inputData,const size_t dataBytes)154 void ArmnnNetworkExecutor<Tout>::PrepareTensors(const void *inputData, const size_t dataBytes)
155 {
156     size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes;
157     auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw;
158     assert(inputTensorSize >= dataBytes);
159     if (inputTensorPtr != nullptr)
160     {
161        memcpy(inputTensorPtr, inputData, inputTensorSize);
162     }
163     else
164     {
165         const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"};
166         ARMNN_LOG(error) << errorMessage;
167         throw armnn::Exception(errorMessage);
168     }
169 
170 }
171 
172 template <typename Tout>
Run(const void * inputData,const size_t dataBytes,InferenceResults<Tout> & outResults)173 bool ArmnnNetworkExecutor<Tout>::Run(const void *inputData, const size_t dataBytes,
174                                              InferenceResults<Tout>& outResults)
175 {
176     bool ret = false;
177     m_profiling.ProfilingStart();
178     PrepareTensors(inputData, dataBytes);
179 
180     if (m_interpreter->Invoke() == kTfLiteOk)
181     {
182 
183 
184         ret = true;
185         // Extract the output tensor data.
186         outResults.clear();
187         outResults.reserve(m_interpreter->outputs().size());
188         for (int index = 0; index < m_interpreter->outputs().size(); index++)
189         {
190             size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout);
191             const Tout *p_Output = m_interpreter->typed_output_tensor<Tout>(index);
192             if (p_Output != nullptr) {
193                 InferenceResult<float> outRes(p_Output, p_Output + size);
194                 outResults.emplace_back(outRes);
195             }
196             else
197             {
198                 const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"};
199                 ARMNN_LOG(error) << errorMessage;
200                 ret = false;
201             }
202         }
203     }
204     else
205     {
206         const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"};
207         ARMNN_LOG(error) << errorMessage;
208     }
209     m_profiling.ProfilingStopAndPrintUs("Perform inference");
210     return ret;
211 }
212 
213 template <typename Tout>
GetImageAspectRatio()214 Size ArmnnNetworkExecutor<Tout>::GetImageAspectRatio()
215 {
216     assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4);
217     return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2],
218                 m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]);
219 }
220 
221 template <typename Tout>
GetInputDataType() const222 armnn::DataType ArmnnNetworkExecutor<Tout>::GetInputDataType() const
223 {
224     return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0])));
225 }
226 
227 template <typename Tout>
GetQuantizationScale()228 float ArmnnNetworkExecutor<Tout>::GetQuantizationScale()
229 {
230     return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale;
231 }
232 
233 template <typename Tout>
GetQuantizationOffset()234 int ArmnnNetworkExecutor<Tout>::GetQuantizationOffset()
235 {
236     return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point;
237 }
238 
239 template <typename Tout>
GetOutputQuantizationScale(int tensorIndex)240 float ArmnnNetworkExecutor<Tout>::GetOutputQuantizationScale(int tensorIndex)
241 {
242     assert(m_interpreter->outputs().size() > tensorIndex);
243     return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale;
244 }
245 
246 template <typename Tout>
GetOutputQuantizationOffset(int tensorIndex)247 int ArmnnNetworkExecutor<Tout>::GetOutputQuantizationOffset(int tensorIndex)
248 {
249     assert(m_interpreter->outputs().size() > tensorIndex);
250     return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point;
251 }
252 
253 }// namespace common