1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #define LOG_TAG "ArmnnDriver"
7
8 #include "ArmnnPreparedModel.hpp"
9 #include "Utils.hpp"
10
11 #include <log/log.h>
12 #include <OperationsUtils.h>
13 #include <ValidateHal.h>
14
15 #include <cassert>
16 #include <cinttypes>
17
18 using namespace android;
19
20 namespace
21 {
22 using namespace armnn_driver;
23
NotifyCallbackAndCheck(const::android::sp<V1_0::IExecutionCallback> & callback,V1_0::ErrorStatus errorStatus,std::string callingFunction)24 void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, V1_0::ErrorStatus errorStatus,
25 std::string callingFunction)
26 {
27 Return<void> returned = callback->notify(errorStatus);
28 // This check is required, if the callback fails and it isn't checked it will bring down the service
29 if (!returned.isOk())
30 {
31 ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
32 callingFunction.c_str(), returned.description().c_str());
33 }
34 }
35
ValidateRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo)36 bool ValidateRequestArgument(const V1_0::RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
37 {
38 if (requestArg.dimensions.size() != 0)
39 {
40 if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
41 {
42 ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
43 requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
44 return false;
45 }
46
47 for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
48 {
49 if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.GetShape()[d])
50 {
51 ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
52 d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
53 return false;
54 }
55 }
56 }
57
58 return true;
59 }
60
GetTensorForRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo,const std::vector<::android::nn::RunTimePoolInfo> & requestPools)61 armnn::Tensor GetTensorForRequestArgument(const V1_0::RequestArgument& requestArg,
62 const armnn::TensorInfo& tensorInfo,
63 const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
64 {
65 if (!ValidateRequestArgument(requestArg, tensorInfo))
66 {
67 return armnn::Tensor();
68 }
69
70 return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
71 }
72
BuildTensorName(const char * tensorNamePrefix,std::size_t index)73 inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
74 {
75 return tensorNamePrefix + std::to_string(index);
76 }
77
78 } // anonymous namespace
79
80 using namespace android::hardware;
81
82 namespace armnn_driver
83 {
84 template<typename HalVersion>
85 RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
86 ArmnnPreparedModel<HalVersion>::m_RequestThread;
87
88 template<typename HalVersion>
89 template <typename TensorBindingCollection>
DumpTensorsIfRequired(char const * tensorNamePrefix,const TensorBindingCollection & tensorBindings)90 void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
91 const TensorBindingCollection& tensorBindings)
92 {
93 if (!m_RequestInputsAndOutputsDumpDir.empty())
94 {
95 const std::string requestName = std::to_string(m_NetworkId) + "_" + std::to_string(m_RequestCount) + ".dump";
96 for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
97 {
98 DumpTensor(m_RequestInputsAndOutputsDumpDir,
99 requestName,
100 BuildTensorName(tensorNamePrefix, i),
101 tensorBindings[i].second);
102 }
103 }
104 }
105
106 template<typename HalVersion>
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const HalModel & model,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled)107 ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
108 armnn::IRuntime* runtime,
109 const HalModel& model,
110 const std::string& requestInputsAndOutputsDumpDir,
111 const bool gpuProfilingEnabled)
112 : m_NetworkId(networkId)
113 , m_Runtime(runtime)
114 , m_Model(model)
115 , m_RequestCount(0)
116 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
117 , m_GpuProfilingEnabled(gpuProfilingEnabled)
118 {
119 // Enable profiling if required.
120 m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
121 }
122
123 template<typename HalVersion>
~ArmnnPreparedModel()124 ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
125 {
126 // Get a hold of the profiler used by this model.
127 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
128
129 // Unload the network associated with this model.
130 m_Runtime->UnloadNetwork(m_NetworkId);
131
132 // Dump the profiling info to a file if required.
133 DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
134 }
135
136 template<typename HalVersion>
execute(const V1_0::Request & request,const::android::sp<V1_0::IExecutionCallback> & callback)137 Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
138 const V1_0::Request& request,
139 const ::android::sp<V1_0::IExecutionCallback>& callback)
140 {
141 ALOGV("ArmnnPreparedModel::execute(): %s", GetModelSummary(m_Model).c_str());
142 m_RequestCount++;
143
144 if (callback.get() == nullptr) {
145 ALOGE("ArmnnPreparedModel::execute invalid callback passed");
146 return V1_0::ErrorStatus::INVALID_ARGUMENT;
147 }
148
149 if (!android::nn::validateRequest(request, m_Model))
150 {
151 NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel::execute");
152 return V1_0::ErrorStatus::INVALID_ARGUMENT;
153 }
154
155 if (!m_RequestInputsAndOutputsDumpDir.empty())
156 {
157 ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
158 }
159
160 // allocate the tensors on the heap, as they are passed to the request thread
161 auto pInputTensors = std::make_shared<armnn::InputTensors>();
162 auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
163
164 // map the memory pool into shared pointers
165 // use a shared memory pools vector on the heap, as it is passed to the request thread
166 auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
167 if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
168 {
169 NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
170 return V1_0::ErrorStatus::GENERAL_FAILURE;
171 }
172
173 // add the inputs and outputs with their data
174 try
175 {
176 pInputTensors->reserve(request.inputs.size());
177 for (unsigned int i = 0; i < request.inputs.size(); i++)
178 {
179 const auto& inputArg = request.inputs[i];
180
181 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
182 const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
183
184 uint32_t poolIndex = inputArg.location.poolIndex;
185 if (poolIndex >= pMemPools->size())
186 {
187 ALOGE("Cannot execute request. Error converting request input %u to tensor: wrong poolIndex", i);
188 return V1_0::ErrorStatus::GENERAL_FAILURE;
189 }
190
191 uint8_t* inputTensorBegin = static_cast<uint8_t*>(inputTensor.GetMemoryArea());
192 if (inputTensorBegin == nullptr)
193 {
194 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
195 return V1_0::ErrorStatus::GENERAL_FAILURE;
196 }
197
198 const size_t inputTensorSize = inputTensorInfo.GetNumBytes();
199 uint8_t* memoryPoolBegin = (*pMemPools)[poolIndex].getBuffer();
200 uint32_t memoryPoolSize = (*pMemPools)[poolIndex].getSize();
201 bool inputTensorIsOutOfMemoryRage = (inputTensorBegin + inputTensorSize) > (memoryPoolBegin + memoryPoolSize);
202
203 if (inputTensorIsOutOfMemoryRage)
204 {
205 ALOGE("Cannot execute request. Error converting request input %u to tensor: out of Memory Pool", i);
206 return V1_0::ErrorStatus::GENERAL_FAILURE;
207 }
208
209 pInputTensors->emplace_back(i, inputTensor);
210 }
211
212 pOutputTensors->reserve(request.outputs.size());
213 for (unsigned int i = 0; i < request.outputs.size(); i++)
214 {
215 const auto& outputArg = request.outputs[i];
216
217 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
218 const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
219
220 uint32_t poolIndex = outputArg.location.poolIndex;
221 if (poolIndex >= pMemPools->size())
222 {
223 ALOGE("Cannot execute request. Error converting request output %u to tensor: wrong poolIndex", i);
224 return V1_0::ErrorStatus::GENERAL_FAILURE;
225 }
226
227 uint8_t* outputTensorBegin = static_cast<uint8_t*>(outputTensor.GetMemoryArea());
228 if (outputTensorBegin == nullptr)
229 {
230 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
231 return V1_0::ErrorStatus::GENERAL_FAILURE;
232 }
233
234 const size_t outputTensorSize = outputTensorInfo.GetNumBytes();
235 uint8_t* memoryPoolBegin = (*pMemPools)[poolIndex].getBuffer();
236 uint32_t memoryPoolSize = (*pMemPools)[poolIndex].getSize();
237 bool outputTensorIsOutOfMemoryRage = (outputTensorBegin + outputTensorSize) > (memoryPoolBegin + memoryPoolSize);
238
239 if (outputTensorIsOutOfMemoryRage)
240 {
241 ALOGE("Cannot execute request. Error converting request output %u to tensor: out of Memory Pool", i);
242 return V1_0::ErrorStatus::GENERAL_FAILURE;
243 }
244
245 pOutputTensors->emplace_back(i, outputTensor);
246 }
247 }
248 catch (armnn::Exception& e)
249 {
250 ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
251 NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
252 return V1_0::ErrorStatus::GENERAL_FAILURE;
253 }
254 catch (std::exception& e)
255 {
256 ALOGE("std::exception caught while preparing for EnqueueWorkload: %s", e.what());
257 NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
258 return V1_0::ErrorStatus::GENERAL_FAILURE;
259 }
260
261 ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
262
263 auto cb = [callback](V1_0::ErrorStatus errorStatus, std::string callingFunction)
264 {
265 NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
266 };
267
268 CallbackContext_1_0 armnnCb;
269 armnnCb.callback = cb;
270 // post the request for asynchronous execution
271 m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
272 ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
273 return V1_0::ErrorStatus::NONE; // successfully queued
274 }
275
276 template<typename HalVersion>
ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,armnn::InputTensors & inputTensors,armnn::OutputTensors & outputTensors,CallbackContext_1_0 cb)277 void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
278 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
279 armnn::InputTensors& inputTensors,
280 armnn::OutputTensors& outputTensors,
281 CallbackContext_1_0 cb)
282 {
283 ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
284
285 DumpTensorsIfRequired("Input", inputTensors);
286
287 // run it
288 try
289 {
290 armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
291 if (status != armnn::Status::Success)
292 {
293 ALOGW("EnqueueWorkload failed");
294 cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
295 return;
296 }
297 }
298 catch (armnn::Exception& e)
299 {
300 ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
301 cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
302 return;
303 }
304 catch (std::exception& e)
305 {
306 ALOGE("std::exception caught from EnqueueWorkload: %s", e.what());
307 cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
308 return;
309 }
310
311 DumpTensorsIfRequired("Output", outputTensors);
312
313 // Commit output buffers.
314 // Note that we update *all* pools, even if they aren't actually used as outputs -
315 // this is simpler and is what the CpuExecutor does.
316 for (android::nn::RunTimePoolInfo& pool : *pMemPools)
317 {
318 // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
319 // update() has been removed and flush() added.
320 #if defined(ARMNN_ANDROID_R) // Use the new Android implementation.
321 pool.flush();
322 #else
323 pool.update();
324 #endif
325 }
326
327 cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
328 }
329
330 template<typename HalVersion>
ExecuteWithDummyInputs()331 bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
332 {
333 std::vector<std::vector<char>> storage;
334 armnn::InputTensors inputTensors;
335 for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
336 {
337 const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
338 storage.emplace_back(inputTensorInfo.GetNumBytes());
339 const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
340
341 inputTensors.emplace_back(i, inputTensor);
342 }
343
344 armnn::OutputTensors outputTensors;
345 for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
346 {
347 const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
348 storage.emplace_back(outputTensorInfo.GetNumBytes());
349 const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
350
351 outputTensors.emplace_back(i, outputTensor);
352 }
353
354 try
355 {
356 armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
357 if (status != armnn::Status::Success)
358 {
359 ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
360 return false;
361 }
362 }
363 catch (armnn::Exception& e)
364 {
365 ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
366 return false;
367 }
368 catch (std::exception& e)
369 {
370 ALOGE("ExecuteWithDummyInputs: std::exception caught from EnqueueWorkload: %s", e.what());
371 return false;
372 }
373 return true;
374 }
375
376 ///
377 /// Class template specializations
378 ///
379
380 template class ArmnnPreparedModel<hal_1_0::HalPolicy>;
381
382 #ifdef ARMNN_ANDROID_NN_V1_1
383 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
384 #endif
385
386 #ifdef ARMNN_ANDROID_NN_V1_2
387 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
388 template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
389 #endif
390
391 #ifdef ARMNN_ANDROID_NN_V1_3
392 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
393 template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
394 template class ArmnnPreparedModel<hal_1_3::HalPolicy>;
395 #endif
396 } // namespace armnn_driver
397