• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2020 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ArmnnDriverImpl.hpp"
7 #include "../ArmnnPreparedModel_1_3.hpp"
8 #include "../ModelToINetworkConverter.hpp"
9 #include "../SystemPropertiesUtils.hpp"
10 
11 #include <log/log.h>
12 
13 namespace
14 {
15 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
16 const char *g_RelaxedFloat32toFloat16PerformancePowerUsage  = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
17 
18 const char *g_ifPerformanceExecTime                         = "ArmNN.ifPerformance.execTime";
19 const char *g_ifPerformancePowerUsage                       = "ArmNN.ifPerformance.powerUsage";
20 
21 const char *g_whilePerformanceExecTime                      = "ArmNN.whilePerformance.execTime";
22 const char *g_whilePerformancePowerUsage                    = "ArmNN.whilePerformance.powerUsage";
23 
24 const char *g_OperandTypeTensorFloat32PerformanceExecTime   = "Armnn.operandTypeTensorFloat32Performance.execTime";
25 const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
26 
27 const char *g_OperandTypeFloat32PerformanceExecTime         = "Armnn.operandTypeFloat32Performance.execTime";
28 const char *g_OperandTypeFloat32PerformancePowerUsage       = "Armnn.operandTypeFloat32Performance.powerUsage";
29 
30 const char *g_OperandTypeTensorFloat16PerformanceExecTime   = "Armnn.operandTypeTensorFloat16Performance.execTime";
31 const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
32 
33 const char *g_OperandTypeFloat16PerformanceExecTime         = "Armnn.operandTypeFloat16Performance.execTime";
34 const char *g_OperandTypeFloat16PerformancePowerUsage       = "Armnn.operandTypeFloat16Performance.powerUsage";
35 
36 const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
37         "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
38 const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
39         "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
40 
41 const char *g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime =
42     "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.execTime";
43 const char *g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage =
44     "Armnn.operandTypeTensorQuant8AsymmSignedPerformance.powerUsage";
45 
46 const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
47         "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
48 const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
49         "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
50 
51 const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
52         "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
53 const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
54         "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
55 
56 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
57     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
58 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
59     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
60 
61 
62 const char *g_OperandTypeTensorInt32PerformanceExecTime     = "Armnn.operandTypeTensorInt32Performance.execTime";
63 const char *g_OperandTypeTensorInt32PerformancePowerUsage   = "Armnn.operandTypeTensorInt32Performance.powerUsage";
64 
65 const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandTypeInt32Performance.execTime";
66 const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
67 
68 
NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback> & callback,V1_3::ErrorStatus errorStatus,const sp<V1_3::IPreparedModel> & preparedModelPtr)69 void NotifyCallbackAndCheck(const sp<V1_3::IPreparedModelCallback>& callback,
70                             V1_3::ErrorStatus errorStatus,
71                             const sp<V1_3::IPreparedModel>& preparedModelPtr)
72 {
73     Return<void> returned = callback->notify_1_3(errorStatus, preparedModelPtr);
74     // This check is required, if the callback fails and it isn't checked it will bring down the service
75     if (!returned.isOk())
76     {
77         ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
78               returned.description().c_str());
79     }
80 }
81 
FailPrepareModel(V1_3::ErrorStatus error,const std::string & message,const sp<V1_3::IPreparedModelCallback> & callback)82 Return<V1_3::ErrorStatus> FailPrepareModel(V1_3::ErrorStatus error,
83                                            const std::string& message,
84                                            const sp<V1_3::IPreparedModelCallback>& callback)
85 {
86     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
87     NotifyCallbackAndCheck(callback, error, nullptr);
88     return error;
89 }
90 
91 } // anonymous namespace
92 
93 namespace armnn_driver
94 {
95 namespace hal_1_3
96 {
97 
prepareArmnnModel_1_3(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_3::Model & model,const sp<V1_3::IPreparedModelCallback> & cb,bool float32ToFloat16,V1_3::Priority priority)98 Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
99        const armnn::IRuntimePtr& runtime,
100        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
101        const DriverOptions& options,
102        const V1_3::Model& model,
103        const sp<V1_3::IPreparedModelCallback>& cb,
104        bool float32ToFloat16,
105        V1_3::Priority priority)
106 {
107     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
108 
109     if (cb.get() == nullptr)
110     {
111         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
112         return V1_3::ErrorStatus::INVALID_ARGUMENT;
113     }
114 
115     if (!runtime)
116     {
117         return FailPrepareModel(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
118     }
119 
120     if (!android::nn::validateModel(model))
121     {
122         return FailPrepareModel(V1_3::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
123     }
124 
125     // Deliberately ignore any unsupported operations requested by the options -
126     // at this point we're being asked to prepare a model that we've already declared support for
127     // and the operation indices may be different to those in getSupportedOperations anyway.
128     std::set<unsigned int> unsupportedOperations;
129     ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
130                                                        model,
131                                                        unsupportedOperations);
132 
133     if (modelConverter.GetConversionResult() != ConversionResult::Success)
134     {
135         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
136         return V1_3::ErrorStatus::NONE;
137     }
138 
139     // Optimize the network
140     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
141     armnn::OptimizerOptions OptOptions;
142     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
143 
144     armnn::BackendOptions gpuAcc("GpuAcc",
145     {
146         { "FastMathEnabled", options.IsFastMathEnabled() }
147     });
148     armnn::BackendOptions cpuAcc("CpuAcc",
149     {
150         { "FastMathEnabled", options.IsFastMathEnabled() }
151     });
152     OptOptions.m_ModelOptions.push_back(gpuAcc);
153     OptOptions.m_ModelOptions.push_back(cpuAcc);
154 
155     std::vector<std::string> errMessages;
156     try
157     {
158         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
159                                  options.GetBackends(),
160                                  runtime->GetDeviceSpec(),
161                                  OptOptions,
162                                  errMessages);
163     }
164     catch (std::exception& e)
165     {
166         std::stringstream message;
167         message << "Exception (" << e.what() << ") caught from optimize.";
168         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
169         return V1_3::ErrorStatus::NONE;
170     }
171 
172     // Check that the optimized network is valid.
173     if (!optNet)
174     {
175         std::stringstream message;
176         message << "Invalid optimized network";
177         for (const std::string& msg : errMessages)
178         {
179             message << "\n" << msg;
180         }
181         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
182         return V1_3::ErrorStatus::NONE;
183     }
184 
185     // Export the optimized network graph to a dot file if an output dump directory
186     // has been specified in the drivers' arguments.
187     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
188                                                                options.GetRequestInputsAndOutputsDumpDir());
189 
190     // Load it into the runtime.
191     armnn::NetworkId netId = 0;
192     try
193     {
194         if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
195         {
196             return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
197         }
198     }
199     catch (std::exception& e)
200     {
201         std::stringstream message;
202         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
203         FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
204         return V1_3::ErrorStatus::NONE;
205     }
206 
207     // Now that we have a networkId for the graph rename the dump file to use it
208     // so that we can associate the graph file and the input/output tensor dump files
209     RenameGraphDotFile(dotGraphFileName,
210                        options.GetRequestInputsAndOutputsDumpDir(),
211                        netId);
212 
213     std::unique_ptr<ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>> preparedModel(
214             new ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>(
215                     netId,
216                     runtime.get(),
217                     model,
218                     options.GetRequestInputsAndOutputsDumpDir(),
219                     options.IsGpuProfilingEnabled(),
220                     priority));
221 
222     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
223     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
224     if (!preparedModel->ExecuteWithDummyInputs())
225     {
226         return FailPrepareModel(V1_3::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
227     }
228 
229     if (clTunedParameters &&
230         options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
231     {
232         // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
233         try
234         {
235             clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
236         }
237         catch (std::exception& error)
238         {
239             ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
240                   options.GetClTunedParametersFile().c_str(), error.what());
241         }
242     }
243 
244     NotifyCallbackAndCheck(cb, V1_3::ErrorStatus::NONE, preparedModel.release());
245 
246     return V1_3::ErrorStatus::NONE;
247 }
248 
getCapabilities_1_3(const armnn::IRuntimePtr & runtime,V1_3::IDevice::getCapabilities_1_3_cb cb)249 Return<void> ArmnnDriverImpl::getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
250                                                   V1_3::IDevice::getCapabilities_1_3_cb cb)
251 {
252     ALOGV("hal_1_3::ArmnnDriverImpl::getCapabilities()");
253 
254     V1_3::Capabilities capabilities;
255 
256     float defaultValue = .1f;
257 
258     if (runtime)
259     {
260         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
261                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
262 
263         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
264                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
265 
266         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
267                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
268 
269         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
270                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
271 
272         capabilities.ifPerformance.execTime =
273                 ParseSystemProperty(g_ifPerformanceExecTime, defaultValue);
274 
275         capabilities.ifPerformance.powerUsage =
276                 ParseSystemProperty(g_ifPerformancePowerUsage, defaultValue);
277 
278         capabilities.whilePerformance.execTime =
279                 ParseSystemProperty(g_whilePerformanceExecTime, defaultValue);
280 
281         capabilities.whilePerformance.powerUsage =
282                 ParseSystemProperty(g_whilePerformancePowerUsage, defaultValue);
283 
284         // Set the base value for all operand types
285         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({FLT_MAX, FLT_MAX});
286 
287         // Load supported operand types
288         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
289                 {
290                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
291                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
292                 });
293 
294         update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
295                 {
296                     .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
297                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
298                 });
299 
300         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT16,
301                 {
302                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
303                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
304                 });
305 
306         update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT16,
307                 {
308                     .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
309                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
310                 });
311 
312         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM,
313                 {
314                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
315                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
316                 });
317 
318         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM,
319                 {
320                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
321                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
322                 });
323         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
324                {
325                    .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformanceExecTime,
326                    defaultValue),
327                    .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmSignedPerformancePowerUsage,
328                    defaultValue)
329                });
330 
331         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT16_SYMM,
332                 {
333                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
334                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
335                 });
336 
337         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
338                {
339                    .execTime =
340                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
341                    .powerUsage =
342                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
343                });
344 
345         update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_INT32,
346                 {
347                     .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
348                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
349                 });
350 
351         update(&capabilities.operandPerformance, V1_3::OperandType::INT32,
352                 {
353                     .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
354                     .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
355                 });
356 
357         cb(V1_3::ErrorStatus::NONE, capabilities);
358     }
359     else
360     {
361         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime   = 0;
362         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
363         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime   = 0;
364         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
365         capabilities.ifPerformance.execTime      = 0;
366         capabilities.ifPerformance.powerUsage    = 0;
367         capabilities.whilePerformance.execTime   = 0;
368         capabilities.whilePerformance.powerUsage = 0;
369 
370         // Set the base value for all operand types
371         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({0.f, 0.0f});
372 
373         cb(V1_3::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
374     }
375 
376     return Void();
377 }
378 
379 } // namespace hal_1_3
380 } // namespace armnn_driver