1 //
2 // Copyright © 2020 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ArmnnDriverImpl.hpp"
7 #include "../ArmnnPreparedModel_1_3.hpp"
8 #include "../ModelToINetworkConverter.hpp"
9 #include "../SystemPropertiesUtils.hpp"
10
11 #include <log/log.h>
12
13 namespace
14 {
15 const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
16 const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
17
18 const char *g_ifPerformanceExecTime = "ArmNN.ifPerformance.execTime";
19 const char *g_ifPerformancePowerUsage = "ArmNN.ifPerformance.powerUsage";
20
21 const char *g_whilePerformanceExecTime = "ArmNN.whilePerformance.execTime";
22 const char *g_whilePerformancePowerUsage = "ArmNN.whilePerformance.powerUsage";
23
24 const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
25 const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
26
27 const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
28 const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
29
30 const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
31 const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
32
33 const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
34 const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
35
36 const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
37 "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
38 const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
39 "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
40
41 const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
42 "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
43 const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
44 "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
45
46 const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
47 "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
48 const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
49 "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
50
51 const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
52 "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
53 const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
54 "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
55
56 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
57 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
58 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
59 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
60
61
62 const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
63 const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
64
65 const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
66 const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
67
68
NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback> & callback,V1_3::ErrorStatus errorStatus,const sp<V1_3::IPreparedModel> & preparedModelPtr)69 void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
70 V1_3::ErrorStatus errorStatus,
71 const sp<V1_3::IPreparedModel>& preparedModelPtr)
72 {
73 Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
74 // This check is required, if the callback fails and it isn't checked it will bring down the service
75 if (!returned.isOk())
76 {
77 ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
78 returned.description().c_str());
79 }
80 }
81
FailPrepareModel(V1_3::ErrorStatus error,const std::string & message,const sp<V1_3::IPreparedModelCallback> & callback)82 Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
83 const std::string& message,
84 const sp<V1_3::IPreparedModelCallback>& callback)
85 {
86 ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
87 NotifyCallbackAndCheck(callback, error, nullptr);
88 return error;
89 }
90
91 } // anonymous namespace
92
93 namespace armnn_driver
94 {
95 namespace hal_1_3
96 {
97
prepareArmnnModel_1_3(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_3::Model & model,const sp<V1_3::IPreparedModelCallback> & cb,bool float32ToFloat16,V1_3::Priority priority)98 Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
99 const armnn::IRuntimePtr& runtime,
100 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
101 const DriverOptions& options,
102 const V1_3::Model& model,
103 const sp<V1_3::IPreparedModelCallback>& cb,
104 bool float32ToFloat16,
105 V1_3::Priority priority)
106 {
107 ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
108
109 if (cb.get() == nullptr)
110 {
111 ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
112 return V1_3::ErrorStatus::INVALID_ARGUMENT;
113 }
114
115 if (!runtime)
116 {
117 return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
118 }
119
120 if (!android::nn::validateModel(model))
121 {
122 return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
123 }
124
125 // Deliberately ignore any unsupported operations requested by the options -
126 // at this point we're being asked to prepare a model that we've already declared support for
127 // and the operation indices may be different to those in getSupportedOperations anyway.
128 std::set<unsigned int> unsupportedOperations;
129 ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
130 model,
131 unsupportedOperations);
132
133 if (modelConverter.GetConversionResult() != ConversionResult::Success)
134 {
135 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
136 return V1_3::ErrorStatus::NONE;
137 }
138
139 // Optimize the network
140 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
141 armnn::OptimizerOptions OptOptions;
142 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
143
144 armnn::BackendOptions gpuAcc("GpuAcc",
145 {
146 { "FastMathEnabled", options.IsFastMathEnabled() }
147 });
148 armnn::BackendOptions cpuAcc("CpuAcc",
149 {
150 { "FastMathEnabled", options.IsFastMathEnabled() }
151 });
152 OptOptions.m_ModelOptions.push_back(gpuAcc);
153 OptOptions.m_ModelOptions.push_back(cpuAcc);
154
155 std::vector<std::string> errMessages;
156 try
157 {
158 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
159 options.GetBackends(),
160 runtime->GetDeviceSpec(),
161 OptOptions,
162 errMessages);
163 }
164 catch (std::exception& e)
165 {
166 std::stringstream message;
167 message << "Exception (" << e.what() << ") caught from optimize.";
168 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
169 return V1_3::ErrorStatus::NONE;
170 }
171
172 // Check that the optimized network is valid.
173 if (!optNet)
174 {
175 std::stringstream message;
176 message << "Invalid optimized network";
177 for (const std::string& msg : errMessages)
178 {
179 message << "\n" << msg;
180 }
181 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
182 return V1_3::ErrorStatus::NONE;
183 }
184
185 // Export the optimized network graph to a dot file if an output dump directory
186 // has been specified in the drivers' arguments.
187 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
188 options.GetRequestInputsAndOutputsDumpDir());
189
190 // Load it into the runtime.
191 armnn::NetworkId netId = 0;
192 try
193 {
194 if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
195 {
196 return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
197 }
198 }
199 catch (std::exception& e)
200 {
201 std::stringstream message;
202 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
203 FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
204 return V1_3::ErrorStatus::NONE;
205 }
206
207 // Now that we have a networkId for the graph rename the dump file to use it
208 // so that we can associate the graph file and the input/output tensor dump files
209 RenameGraphDotFile(dotGraphFileName,
210 options.GetRequestInputsAndOutputsDumpDir(),
211 netId);
212
213 std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
214 new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
215 netId,
216 runtime.get(),
217 model,
218 options.GetRequestInputsAndOutputsDumpDir(),
219 options.IsGpuProfilingEnabled(),
220 priority));
221
222 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
223 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
224 if (!preparedModel->ExecuteWithDummyInputs())
225 {
226 return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
227 }
228
229 if (clTunedParameters &&
230 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
231 {
232 // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
233 try
234 {
235 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
236 }
237 catch (std::exception& error)
238 {
239 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
240 options.GetClTunedParametersFile().c_str(), error.what());
241 }
242 }
243
244 NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
245
246 return V1_3::ErrorStatus::NONE;
247 }
248
getCapabilities_1_3(const armnn::IRuntimePtr & runtime,V1_3::IDevice::getCapabilities_1_3_cb cb)249 Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
250 V1_3::IDevice::getCapabilities_1_3_cb cb)
251 {
252 ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
253
254 V1_3::Capabilities capabilities;
255
256 float defaultValue = .1f;
257
258 if (runtime)
259 {
260 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
261 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
262
263 capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
264 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
265
266 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
267 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
268
269 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
270 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
271
272 capabilities.ifPerformance.execTime =
273 ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
274
275 capabilities.ifPerformance.powerUsage =
276 ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
277
278 capabilities.whilePerformance.execTime =
279 ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
280
281 capabilities.whilePerformance.powerUsage =
282 ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
283
284 // Set the base value for all operand types
285 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
286
287 // Load supported operand types
288 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
289 {
290 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
291 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
292 });
293
294 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
295 {
296 .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
297 .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
298 });
299
300 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
301 {
302 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
303 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
304 });
305
306 update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
307 {
308 .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
309 .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
310 });
311
312 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
313 {
314 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
315 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
316 });
317
318 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
319 {
320 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
321 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
322 });
323 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
324 {
325 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
326 defaultValue),
327 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
328 defaultValue)
329 });
330
331 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
332 {
333 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
334 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
335 });
336
337 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
338 {
339 .execTime =
340 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
341 .powerUsage =
342 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
343 });
344
345 update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
346 {
347 .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
348 .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
349 });
350
351 update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
352 {
353 .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
354 .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
355 });
356
357 cb(V1_3::ErrorStatus::NONE, capabilities);
358 }
359 else
360 {
361 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
362 capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
363 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
364 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
365 capabilities.ifPerformance.execTime = 0;
366 capabilities.ifPerformance.powerUsage = 0;
367 capabilities.whilePerformance.execTime = 0;
368 capabilities.whilePerformance.powerUsage = 0;
369
370 // Set the base value for all operand types
371 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
372
373 cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
374 }
375
376 return Void();
377 }
378
379 } // namespace hal_1_3
380 } // namespace armnn_driver