1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5
6 #include "ArmnnDriverImpl.hpp"
7 #include "../ArmnnPreparedModel_1_2.hpp"
8 #include "../ModelToINetworkConverter.hpp"
9 #include "../SystemPropertiesUtils.hpp"
10
11 #include <log/log.h>
12
13 namespace
14 {
15
16 const char *g_RelaxedFloat32toFloat16PerformanceExecTime = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
17 const char *g_RelaxedFloat32toFloat16PerformancePowerUsage = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
18
19 const char *g_OperandTypeTensorFloat32PerformanceExecTime = "Armnn.operandTypeTensorFloat32Performance.execTime";
20 const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
21
22 const char *g_OperandTypeFloat32PerformanceExecTime = "Armnn.operandTypeFloat32Performance.execTime";
23 const char *g_OperandTypeFloat32PerformancePowerUsage = "Armnn.operandTypeFloat32Performance.powerUsage";
24
25 const char *g_OperandTypeTensorFloat16PerformanceExecTime = "Armnn.operandTypeTensorFloat16Performance.execTime";
26 const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
27
28 const char *g_OperandTypeFloat16PerformanceExecTime = "Armnn.operandTypeFloat16Performance.execTime";
29 const char *g_OperandTypeFloat16PerformancePowerUsage = "Armnn.operandTypeFloat16Performance.powerUsage";
30
31 const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
32 "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
33 const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
34 "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
35
36 const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
37 "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
38 const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
39 "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
40
41 const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
42 "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
43 const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
44 "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
45
46 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
47 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
48 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
49 "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
50
51
52 const char *g_OperandTypeTensorInt32PerformanceExecTime = "Armnn.operandTypeTensorInt32Performance.execTime";
53 const char *g_OperandTypeTensorInt32PerformancePowerUsage = "Armnn.operandTypeTensorInt32Performance.powerUsage";
54
55 const char *g_OperandTypeInt32PerformanceExecTime = "Armnn.operandTypeInt32Performance.execTime";
56 const char *g_OperandTypeInt32PerformancePowerUsage = "Armnn.operandTypeInt32Performance.powerUsage";
57
58
NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback> & callback,V1_0::ErrorStatus errorStatus,const sp<V1_2::IPreparedModel> & preparedModelPtr)59 void NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback>& callback,
60 V1_0::ErrorStatus errorStatus,
61 const sp<V1_2::IPreparedModel>& preparedModelPtr)
62 {
63 Return<void> returned = callback->notify_1_2(errorStatus, preparedModelPtr);
64 // This check is required, if the callback fails and it isn't checked it will bring down the service
65 if (!returned.isOk())
66 {
67 ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
68 returned.description().c_str());
69 }
70 }
71
FailPrepareModel(V1_0::ErrorStatus error,const std::string & message,const sp<V1_2::IPreparedModelCallback> & callback)72 Return<V1_0::ErrorStatus> FailPrepareModel(V1_0::ErrorStatus error,
73 const std::string& message,
74 const sp<V1_2::IPreparedModelCallback>& callback)
75 {
76 ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
77 NotifyCallbackAndCheck(callback, error, nullptr);
78 return error;
79 }
80
81 } // anonymous namespace
82
83 namespace armnn_driver
84 {
85 namespace hal_1_2
86 {
87
prepareArmnnModel_1_2(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_2::Model & model,const sp<V1_2::IPreparedModelCallback> & cb,bool float32ToFloat16)88 Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
89 const armnn::IRuntimePtr& runtime,
90 const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
91 const DriverOptions& options,
92 const V1_2::Model& model,
93 const sp<V1_2::IPreparedModelCallback>& cb,
94 bool float32ToFloat16)
95 {
96 ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
97
98 if (cb.get() == nullptr)
99 {
100 ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
101 return V1_0::ErrorStatus::INVALID_ARGUMENT;
102 }
103
104 if (!runtime)
105 {
106 return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
107 }
108
109 if (!android::nn::validateModel(model))
110 {
111 return FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
112 }
113
114 // Deliberately ignore any unsupported operations requested by the options -
115 // at this point we're being asked to prepare a model that we've already declared support for
116 // and the operation indices may be different to those in getSupportedOperations anyway.
117 std::set<unsigned int> unsupportedOperations;
118 ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
119 model,
120 unsupportedOperations);
121
122 if (modelConverter.GetConversionResult() != ConversionResult::Success)
123 {
124 FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
125 return V1_0::ErrorStatus::NONE;
126 }
127
128 // Optimize the network
129 armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
130 armnn::OptimizerOptions OptOptions;
131 OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
132
133 armnn::BackendOptions gpuAcc("GpuAcc",
134 {
135 { "FastMathEnabled", options.IsFastMathEnabled() }
136 });
137 armnn::BackendOptions cpuAcc("CpuAcc",
138 {
139 { "FastMathEnabled", options.IsFastMathEnabled() }
140 });
141 OptOptions.m_ModelOptions.push_back(gpuAcc);
142 OptOptions.m_ModelOptions.push_back(cpuAcc);
143
144 std::vector<std::string> errMessages;
145 try
146 {
147 optNet = armnn::Optimize(*modelConverter.GetINetwork(),
148 options.GetBackends(),
149 runtime->GetDeviceSpec(),
150 OptOptions,
151 errMessages);
152 }
153 catch (std::exception &e)
154 {
155 std::stringstream message;
156 message << "Exception (" << e.what() << ") caught from optimize.";
157 FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
158 return V1_0::ErrorStatus::NONE;
159 }
160
161 // Check that the optimized network is valid.
162 if (!optNet)
163 {
164 std::stringstream message;
165 message << "Invalid optimized network";
166 for (const std::string& msg : errMessages)
167 {
168 message << "\n" << msg;
169 }
170 FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
171 return V1_0::ErrorStatus::NONE;
172 }
173
174 // Export the optimized network graph to a dot file if an output dump directory
175 // has been specified in the drivers' arguments.
176 std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
177 options.GetRequestInputsAndOutputsDumpDir());
178
179 // Load it into the runtime.
180 armnn::NetworkId netId = 0;
181 try
182 {
183 if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
184 {
185 return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
186 }
187 }
188 catch (std::exception& e)
189 {
190 std::stringstream message;
191 message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
192 FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
193 return V1_0::ErrorStatus::NONE;
194 }
195
196 // Now that we have a networkId for the graph rename the dump file to use it
197 // so that we can associate the graph file and the input/output tensor dump files
198 RenameGraphDotFile(dotGraphFileName,
199 options.GetRequestInputsAndOutputsDumpDir(),
200 netId);
201
202 std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
203 new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
204 netId,
205 runtime.get(),
206 model,
207 options.GetRequestInputsAndOutputsDumpDir(),
208 options.IsGpuProfilingEnabled()));
209
210 // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
211 // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
212 if (!preparedModel->ExecuteWithDummyInputs())
213 {
214 return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
215 }
216
217 if (clTunedParameters &&
218 options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
219 {
220 // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
221 try
222 {
223 clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
224 }
225 catch (std::exception& error)
226 {
227 ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
228 options.GetClTunedParametersFile().c_str(), error.what());
229 }
230 }
231
232 NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
233
234 return V1_0::ErrorStatus::NONE;
235 }
236
getCapabilities_1_2(const armnn::IRuntimePtr & runtime,V1_2::IDevice::getCapabilities_1_2_cb cb)237 Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
238 V1_2::IDevice::getCapabilities_1_2_cb cb)
239 {
240 ALOGV("hal_1_2::ArmnnDriverImpl::getCapabilities()");
241
242 V1_2::Capabilities capabilities;
243
244 float defaultValue = .1f;
245
246 if (runtime)
247 {
248 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
249 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
250
251 capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
252 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
253
254 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
255 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
256
257 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
258 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
259
260 // Set the base value for all operand types
261 #ifdef ARMNN_ANDROID_R
262 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({FLT_MAX, FLT_MAX});
263 #else
264 capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX});
265 #endif
266
267 // Load supported operand types
268 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT32,
269 {
270 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
271 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
272 });
273
274 update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT32,
275 {
276 .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
277 .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
278 });
279
280 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT16,
281 {
282 .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
283 .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
284 });
285
286 update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT16,
287 {
288 .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
289 .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
290 });
291
292 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_ASYMM,
293 {
294 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
295 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
296 });
297
298 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM,
299 {
300 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
301 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
302 });
303
304 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT16_SYMM,
305 {
306 .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
307 .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
308 });
309
310 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
311 {
312 .execTime =
313 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
314 .powerUsage =
315 ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
316 });
317
318 update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_INT32,
319 {
320 .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
321 .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
322 });
323
324 update(&capabilities.operandPerformance, V1_2::OperandType::INT32,
325 {
326 .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
327 .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
328 });
329
330 cb(V1_0::ErrorStatus::NONE, capabilities);
331 }
332 else
333 {
334 capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime = 0;
335 capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
336 capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime = 0;
337 capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
338
339 // Set the base value for all operand types
340 #ifdef ARMNN_ANDROID_R
341 capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({0.f, 0.0f});
342 #else
343 capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f});
344 #endif
345
346 cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
347 }
348
349 return Void();
350 }
351
352 } // namespace hal_1_2
353 } // namespace armnn_driver
354