• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #define LOG_TAG "ArmnnDriver"
7 
8 #include "ArmnnPreparedModel.hpp"
9 #include "Utils.hpp"
10 
11 #include <log/log.h>
12 #include <OperationsUtils.h>
13 #include <ValidateHal.h>
14 
15 #include <cassert>
16 #include <cinttypes>
17 
18 using namespace android;
19 
20 namespace
21 {
22 using namespace armnn_driver;
23 
NotifyCallbackAndCheck(const::android::sp<V1_0::IExecutionCallback> & callback,V1_0::ErrorStatus errorStatus,std::string callingFunction)24 void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, V1_0::ErrorStatus errorStatus,
25                             std::string callingFunction)
26 {
27     Return<void> returned = callback->notify(errorStatus);
28     // This check is required, if the callback fails and it isn't checked it will bring down the service
29     if (!returned.isOk())
30     {
31         ALOGE("ArmnnDriver::%s: hidl callback failed to return properly: %s",
32             callingFunction.c_str(), returned.description().c_str());
33     }
34 }
35 
ValidateRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo)36 bool ValidateRequestArgument(const V1_0::RequestArgument& requestArg, const armnn::TensorInfo& tensorInfo)
37 {
38     if (requestArg.dimensions.size() != 0)
39     {
40         if (requestArg.dimensions.size() != tensorInfo.GetNumDimensions())
41         {
42             ALOGE("Mismatched dimensions (request argument: %zu, expected: %u)",
43                   requestArg.dimensions.size(), tensorInfo.GetNumDimensions());
44             return false;
45         }
46 
47         for (unsigned int d = 0; d < tensorInfo.GetNumDimensions(); ++d)
48         {
49             if (requestArg.dimensions[d] != 0 && requestArg.dimensions[d] != tensorInfo.GetShape()[d])
50             {
51                 ALOGE("Mismatched size for dimension %d (request argument: %u, expected %u)",
52                     d, requestArg.dimensions[d], tensorInfo.GetShape()[d]);
53                 return false;
54             }
55         }
56     }
57 
58     return true;
59 }
60 
GetTensorForRequestArgument(const V1_0::RequestArgument & requestArg,const armnn::TensorInfo & tensorInfo,const std::vector<::android::nn::RunTimePoolInfo> & requestPools)61 armnn::Tensor GetTensorForRequestArgument(const V1_0::RequestArgument& requestArg,
62     const armnn::TensorInfo& tensorInfo,
63     const std::vector<::android::nn::RunTimePoolInfo>& requestPools)
64 {
65     if (!ValidateRequestArgument(requestArg, tensorInfo))
66     {
67         return armnn::Tensor();
68     }
69 
70     return armnn::Tensor(tensorInfo, GetMemoryFromPool(requestArg.location, requestPools));
71 }
72 
BuildTensorName(const char * tensorNamePrefix,std::size_t index)73 inline std::string BuildTensorName(const char* tensorNamePrefix, std::size_t index)
74 {
75     return tensorNamePrefix + std::to_string(index);
76 }
77 
78 } // anonymous namespace
79 
80 using namespace android::hardware;
81 
82 namespace armnn_driver
83 {
84 template<typename HalVersion>
85 RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
86     ArmnnPreparedModel<HalVersion>::m_RequestThread;
87 
88 template<typename HalVersion>
89 template <typename TensorBindingCollection>
DumpTensorsIfRequired(char const * tensorNamePrefix,const TensorBindingCollection & tensorBindings)90 void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const* tensorNamePrefix,
91                                                            const TensorBindingCollection& tensorBindings)
92 {
93     if (!m_RequestInputsAndOutputsDumpDir.empty())
94     {
95         const std::string requestName = std::to_string(m_NetworkId) + "_" + std::to_string(m_RequestCount) + ".dump";
96         for (std::size_t i = 0u; i < tensorBindings.size(); ++i)
97         {
98             DumpTensor(m_RequestInputsAndOutputsDumpDir,
99                 requestName,
100                 BuildTensorName(tensorNamePrefix, i),
101                 tensorBindings[i].second);
102         }
103     }
104 }
105 
106 template<typename HalVersion>
ArmnnPreparedModel(armnn::NetworkId networkId,armnn::IRuntime * runtime,const HalModel & model,const std::string & requestInputsAndOutputsDumpDir,const bool gpuProfilingEnabled)107 ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
108                                                    armnn::IRuntime* runtime,
109                                                    const HalModel& model,
110                                                    const std::string& requestInputsAndOutputsDumpDir,
111                                                    const bool gpuProfilingEnabled)
112     : m_NetworkId(networkId)
113     , m_Runtime(runtime)
114     , m_Model(model)
115     , m_RequestCount(0)
116     , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
117     , m_GpuProfilingEnabled(gpuProfilingEnabled)
118 {
119     // Enable profiling if required.
120     m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
121 }
122 
123 template<typename HalVersion>
~ArmnnPreparedModel()124 ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
125 {
126     // Get a hold of the profiler used by this model.
127     std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
128 
129     // Unload the network associated with this model.
130     m_Runtime->UnloadNetwork(m_NetworkId);
131 
132     // Dump the profiling info to a file if required.
133     DumpJsonProfilingIfRequired(m_GpuProfilingEnabled, m_RequestInputsAndOutputsDumpDir, m_NetworkId, profiler.get());
134 }
135 
136 template<typename HalVersion>
execute(const V1_0::Request & request,const::android::sp<V1_0::IExecutionCallback> & callback)137 Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
138     const V1_0::Request& request,
139     const ::android::sp<V1_0::IExecutionCallback>& callback)
140 {
141     ALOGV("ArmnnPreparedModel::execute(): %s", GetModelSummary(m_Model).c_str());
142     m_RequestCount++;
143 
144     if (callback.get() == nullptr) {
145         ALOGE("ArmnnPreparedModel::execute invalid callback passed");
146         return V1_0::ErrorStatus::INVALID_ARGUMENT;
147     }
148 
149     if (!android::nn::validateRequest(request, m_Model))
150     {
151         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel::execute");
152         return V1_0::ErrorStatus::INVALID_ARGUMENT;
153     }
154 
155     if (!m_RequestInputsAndOutputsDumpDir.empty())
156     {
157         ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get()));
158     }
159 
160     // allocate the tensors on the heap, as they are passed to the request thread
161     auto pInputTensors = std::make_shared<armnn::InputTensors>();
162     auto pOutputTensors = std::make_shared<armnn::OutputTensors>();
163 
164     // map the memory pool into shared pointers
165     // use a shared memory pools vector on the heap, as it is passed to the request thread
166     auto pMemPools = std::make_shared<std::vector<android::nn::RunTimePoolInfo>>();
167     if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools))
168     {
169         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
170         return V1_0::ErrorStatus::GENERAL_FAILURE;
171     }
172 
173     // add the inputs and outputs with their data
174     try
175     {
176         pInputTensors->reserve(request.inputs.size());
177         for (unsigned int i = 0; i < request.inputs.size(); i++)
178         {
179             const auto& inputArg = request.inputs[i];
180 
181             const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
182             const armnn::Tensor inputTensor = GetTensorForRequestArgument(inputArg, inputTensorInfo, *pMemPools);
183 
184             uint32_t poolIndex = inputArg.location.poolIndex;
185             if (poolIndex >= pMemPools->size())
186             {
187                 ALOGE("Cannot execute request. Error converting request input %u to tensor: wrong poolIndex", i);
188                 return V1_0::ErrorStatus::GENERAL_FAILURE;
189             }
190 
191             uint8_t* inputTensorBegin = static_cast<uint8_t*>(inputTensor.GetMemoryArea());
192             if (inputTensorBegin == nullptr)
193             {
194                 ALOGE("Cannot execute request. Error converting request input %u to tensor", i);
195                 return V1_0::ErrorStatus::GENERAL_FAILURE;
196             }
197 
198             const size_t inputTensorSize = inputTensorInfo.GetNumBytes();
199             uint8_t* memoryPoolBegin = (*pMemPools)[poolIndex].getBuffer();
200             uint32_t memoryPoolSize = (*pMemPools)[poolIndex].getSize();
201             bool inputTensorIsOutOfMemoryRage = (inputTensorBegin + inputTensorSize) > (memoryPoolBegin + memoryPoolSize);
202 
203             if (inputTensorIsOutOfMemoryRage)
204             {
205                 ALOGE("Cannot execute request. Error converting request input %u to tensor: out of Memory Pool", i);
206                 return V1_0::ErrorStatus::GENERAL_FAILURE;
207             }
208 
209             pInputTensors->emplace_back(i, inputTensor);
210         }
211 
212         pOutputTensors->reserve(request.outputs.size());
213         for (unsigned int i = 0; i < request.outputs.size(); i++)
214         {
215             const auto& outputArg = request.outputs[i];
216 
217             const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
218             const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools);
219 
220             uint32_t poolIndex = outputArg.location.poolIndex;
221             if (poolIndex >= pMemPools->size())
222             {
223                 ALOGE("Cannot execute request. Error converting request output %u to tensor: wrong poolIndex", i);
224                 return V1_0::ErrorStatus::GENERAL_FAILURE;
225             }
226 
227             uint8_t* outputTensorBegin = static_cast<uint8_t*>(outputTensor.GetMemoryArea());
228             if (outputTensorBegin == nullptr)
229             {
230                 ALOGE("Cannot execute request. Error converting request output %u to tensor", i);
231                 return V1_0::ErrorStatus::GENERAL_FAILURE;
232             }
233 
234             const size_t outputTensorSize = outputTensorInfo.GetNumBytes();
235             uint8_t* memoryPoolBegin = (*pMemPools)[poolIndex].getBuffer();
236             uint32_t memoryPoolSize = (*pMemPools)[poolIndex].getSize();
237             bool outputTensorIsOutOfMemoryRage = (outputTensorBegin + outputTensorSize) > (memoryPoolBegin + memoryPoolSize);
238 
239             if (outputTensorIsOutOfMemoryRage)
240             {
241                 ALOGE("Cannot execute request. Error converting request output %u to tensor: out of Memory Pool", i);
242                 return V1_0::ErrorStatus::GENERAL_FAILURE;
243             }
244 
245             pOutputTensors->emplace_back(i, outputTensor);
246         }
247     }
248     catch (armnn::Exception& e)
249     {
250         ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what());
251         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
252         return V1_0::ErrorStatus::GENERAL_FAILURE;
253     }
254     catch (std::exception& e)
255     {
256         ALOGE("std::exception caught while preparing for EnqueueWorkload: %s", e.what());
257         NotifyCallbackAndCheck(callback, V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::execute");
258         return V1_0::ErrorStatus::GENERAL_FAILURE;
259     }
260 
261     ALOGV("ArmnnPreparedModel::execute(...) before PostMsg");
262 
263     auto cb = [callback](V1_0::ErrorStatus errorStatus, std::string callingFunction)
264     {
265         NotifyCallbackAndCheck(callback, errorStatus, callingFunction);
266     };
267 
268     CallbackContext_1_0 armnnCb;
269     armnnCb.callback = cb;
270     // post the request for asynchronous execution
271     m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb);
272     ALOGV("ArmnnPreparedModel::execute(...) after PostMsg");
273     return V1_0::ErrorStatus::NONE; // successfully queued
274 }
275 
276 template<typename HalVersion>
ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> & pMemPools,armnn::InputTensors & inputTensors,armnn::OutputTensors & outputTensors,CallbackContext_1_0 cb)277 void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
278         std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
279         armnn::InputTensors& inputTensors,
280         armnn::OutputTensors& outputTensors,
281         CallbackContext_1_0 cb)
282 {
283     ALOGV("ArmnnPreparedModel::ExecuteGraph(...)");
284 
285     DumpTensorsIfRequired("Input", inputTensors);
286 
287     // run it
288     try
289     {
290         armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
291         if (status != armnn::Status::Success)
292         {
293             ALOGW("EnqueueWorkload failed");
294             cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
295             return;
296         }
297     }
298     catch (armnn::Exception& e)
299     {
300         ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what());
301         cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
302         return;
303     }
304     catch (std::exception& e)
305     {
306         ALOGE("std::exception caught from EnqueueWorkload: %s", e.what());
307         cb.callback(V1_0::ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph");
308         return;
309     }
310 
311     DumpTensorsIfRequired("Output", outputTensors);
312 
313     // Commit output buffers.
314     // Note that we update *all* pools, even if they aren't actually used as outputs -
315     // this is simpler and is what the CpuExecutor does.
316     for (android::nn::RunTimePoolInfo& pool : *pMemPools)
317     {
318         // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
319         // update() has been removed and flush() added.
320         #if defined(ARMNN_ANDROID_R) // Use the new Android implementation.
321             pool.flush();
322         #else
323             pool.update();
324         #endif
325     }
326 
327     cb.callback(V1_0::ErrorStatus::NONE, "ExecuteGraph");
328 }
329 
330 template<typename HalVersion>
ExecuteWithDummyInputs()331 bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
332 {
333     std::vector<std::vector<char>> storage;
334     armnn::InputTensors inputTensors;
335     for (unsigned int i = 0; i < getMainModel(m_Model).inputIndexes.size(); i++)
336     {
337         const armnn::TensorInfo inputTensorInfo = m_Runtime->GetInputTensorInfo(m_NetworkId, i);
338         storage.emplace_back(inputTensorInfo.GetNumBytes());
339         const armnn::ConstTensor inputTensor(inputTensorInfo, storage.back().data());
340 
341         inputTensors.emplace_back(i, inputTensor);
342     }
343 
344     armnn::OutputTensors outputTensors;
345     for (unsigned int i = 0; i < getMainModel(m_Model).outputIndexes.size(); i++)
346     {
347         const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i);
348         storage.emplace_back(outputTensorInfo.GetNumBytes());
349         const armnn::Tensor outputTensor(outputTensorInfo, storage.back().data());
350 
351         outputTensors.emplace_back(i, outputTensor);
352     }
353 
354     try
355     {
356         armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors);
357         if (status != armnn::Status::Success)
358         {
359             ALOGW("ExecuteWithDummyInputs: EnqueueWorkload failed");
360             return false;
361         }
362     }
363     catch (armnn::Exception& e)
364     {
365         ALOGW("ExecuteWithDummyInputs: armnn::Exception caught from EnqueueWorkload: %s", e.what());
366         return false;
367     }
368     catch (std::exception& e)
369     {
370         ALOGE("ExecuteWithDummyInputs: std::exception caught from EnqueueWorkload: %s", e.what());
371         return false;
372     }
373     return true;
374 }
375 
376 ///
377 /// Class template specializations
378 ///
379 
380 template class ArmnnPreparedModel<hal_1_0::HalPolicy>;
381 
382 #ifdef ARMNN_ANDROID_NN_V1_1
383 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
384 #endif
385 
386 #ifdef ARMNN_ANDROID_NN_V1_2
387 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
388 template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
389 #endif
390 
391 #ifdef ARMNN_ANDROID_NN_V1_3
392 template class ArmnnPreparedModel<hal_1_1::HalPolicy>;
393 template class ArmnnPreparedModel<hal_1_2::HalPolicy>;
394 template class ArmnnPreparedModel<hal_1_3::HalPolicy>;
395 #endif
396 } // namespace armnn_driver
397