• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ArmnnDriverImpl.hpp"
7 #include "../ArmnnPreparedModel_1_2.hpp"
8 #include "../ModelToINetworkConverter.hpp"
9 #include "../SystemPropertiesUtils.hpp"
10 
11 #include <log/log.h>
12 
13 namespace
14 {
15 
16 const char *g_RelaxedFloat32toFloat16PerformanceExecTime    = "ArmNN.relaxedFloat32toFloat16Performance.execTime";
17 const char *g_RelaxedFloat32toFloat16PerformancePowerUsage  = "ArmNN.relaxedFloat32toFloat16Performance.powerUsage";
18 
19 const char *g_OperandTypeTensorFloat32PerformanceExecTime   = "Armnn.operandTypeTensorFloat32Performance.execTime";
20 const char *g_OperandTypeTensorFloat32PerformancePowerUsage = "Armnn.operandTypeTensorFloat32Performance.powerUsage";
21 
22 const char *g_OperandTypeFloat32PerformanceExecTime         = "Armnn.operandTypeFloat32Performance.execTime";
23 const char *g_OperandTypeFloat32PerformancePowerUsage       = "Armnn.operandTypeFloat32Performance.powerUsage";
24 
25 const char *g_OperandTypeTensorFloat16PerformanceExecTime   = "Armnn.operandTypeTensorFloat16Performance.execTime";
26 const char *g_OperandTypeTensorFloat16PerformancePowerUsage = "Armnn.operandTypeTensorFloat16Performance.powerUsage";
27 
28 const char *g_OperandTypeFloat16PerformanceExecTime         = "Armnn.operandTypeFloat16Performance.execTime";
29 const char *g_OperandTypeFloat16PerformancePowerUsage       = "Armnn.operandTypeFloat16Performance.powerUsage";
30 
31 const char *g_OperandTypeTensorQuant8AsymmPerformanceExecTime =
32         "Armnn.operandTypeTensorQuant8AsymmPerformance.execTime";
33 const char *g_OperandTypeTensorQuant8AsymmPerformancePowerUsage =
34         "Armnn.operandTypeTensorQuant8AsymmPerformance.powerUsage";
35 
36 const char *g_OperandTypeTensorQuant16SymmPerformanceExecTime =
37         "Armnn.operandTypeTensorQuant16SymmPerformance.execTime";
38 const char *g_OperandTypeTensorQuant16SymmPerformancePowerUsage =
39         "Armnn.operandTypeTensorQuant16SymmPerformance.powerUsage";
40 
41 const char *g_OperandTypeTensorQuant8SymmPerformanceExecTime =
42         "Armnn.operandTypeTensorQuant8SymmPerformance.execTime";
43 const char *g_OperandTypeTensorQuant8SymmPerformancePowerUsage =
44         "Armnn.operandTypeTensorQuant8SymmPerformance.powerUsage";
45 
46 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime =
47     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.execTime";
48 const char *g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage =
49     "Armnn.operandTypeTensorQuant8SymmPerChannelPerformance.powerUsage";
50 
51 
52 const char *g_OperandTypeTensorInt32PerformanceExecTime     = "Armnn.operandTypeTensorInt32Performance.execTime";
53 const char *g_OperandTypeTensorInt32PerformancePowerUsage   = "Armnn.operandTypeTensorInt32Performance.powerUsage";
54 
55 const char *g_OperandTypeInt32PerformanceExecTime           = "Armnn.operandTypeInt32Performance.execTime";
56 const char *g_OperandTypeInt32PerformancePowerUsage         = "Armnn.operandTypeInt32Performance.powerUsage";
57 
58 
NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback> & callback,V1_0::ErrorStatus errorStatus,const sp<V1_2::IPreparedModel> & preparedModelPtr)59 void NotifyCallbackAndCheck(const sp<V1_2::IPreparedModelCallback>& callback,
60                             V1_0::ErrorStatus errorStatus,
61                             const sp<V1_2::IPreparedModel>& preparedModelPtr)
62 {
63     Return<void> returned = callback->notify_1_2(errorStatus, preparedModelPtr);
64     // This check is required, if the callback fails and it isn't checked it will bring down the service
65     if (!returned.isOk())
66     {
67         ALOGE("ArmnnDriverImpl::prepareModel: hidl callback failed to return properly: %s ",
68               returned.description().c_str());
69     }
70 }
71 
FailPrepareModel(V1_0::ErrorStatus error,const std::string & message,const sp<V1_2::IPreparedModelCallback> & callback)72 Return<V1_0::ErrorStatus> FailPrepareModel(V1_0::ErrorStatus error,
73                                            const std::string& message,
74                                            const sp<V1_2::IPreparedModelCallback>& callback)
75 {
76     ALOGW("ArmnnDriverImpl::prepareModel: %s", message.c_str());
77     NotifyCallbackAndCheck(callback, error, nullptr);
78     return error;
79 }
80 
81 } // anonymous namespace
82 
83 namespace armnn_driver
84 {
85 namespace hal_1_2
86 {
87 
prepareArmnnModel_1_2(const armnn::IRuntimePtr & runtime,const armnn::IGpuAccTunedParametersPtr & clTunedParameters,const DriverOptions & options,const V1_2::Model & model,const sp<V1_2::IPreparedModelCallback> & cb,bool float32ToFloat16)88 Return<V1_0::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_2(
89        const armnn::IRuntimePtr& runtime,
90        const armnn::IGpuAccTunedParametersPtr& clTunedParameters,
91        const DriverOptions& options,
92        const V1_2::Model& model,
93        const sp<V1_2::IPreparedModelCallback>& cb,
94        bool float32ToFloat16)
95 {
96     ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_2()");
97 
98     if (cb.get() == nullptr)
99     {
100         ALOGW("ArmnnDriverImpl::prepareModel: Invalid callback passed to prepareModel");
101         return V1_0::ErrorStatus::INVALID_ARGUMENT;
102     }
103 
104     if (!runtime)
105     {
106         return FailPrepareModel(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, "Device unavailable", cb);
107     }
108 
109     if (!android::nn::validateModel(model))
110     {
111         return FailPrepareModel(V1_0::ErrorStatus::INVALID_ARGUMENT, "Invalid model passed as input", cb);
112     }
113 
114     // Deliberately ignore any unsupported operations requested by the options -
115     // at this point we're being asked to prepare a model that we've already declared support for
116     // and the operation indices may be different to those in getSupportedOperations anyway.
117     std::set<unsigned int> unsupportedOperations;
118     ModelToINetworkConverter<HalPolicy> modelConverter(options.GetBackends(),
119                                                        model,
120                                                        unsupportedOperations);
121 
122     if (modelConverter.GetConversionResult() != ConversionResult::Success)
123     {
124         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "ModelToINetworkConverter failed", cb);
125         return V1_0::ErrorStatus::NONE;
126     }
127 
128     // Optimize the network
129     armnn::IOptimizedNetworkPtr optNet(nullptr, nullptr);
130     armnn::OptimizerOptions OptOptions;
131     OptOptions.m_ReduceFp32ToFp16 = float32ToFloat16;
132 
133     armnn::BackendOptions gpuAcc("GpuAcc",
134     {
135         { "FastMathEnabled", options.IsFastMathEnabled() }
136     });
137     armnn::BackendOptions cpuAcc("CpuAcc",
138     {
139         { "FastMathEnabled", options.IsFastMathEnabled() }
140     });
141     OptOptions.m_ModelOptions.push_back(gpuAcc);
142     OptOptions.m_ModelOptions.push_back(cpuAcc);
143 
144     std::vector<std::string> errMessages;
145     try
146     {
147         optNet = armnn::Optimize(*modelConverter.GetINetwork(),
148                                  options.GetBackends(),
149                                  runtime->GetDeviceSpec(),
150                                  OptOptions,
151                                  errMessages);
152     }
153     catch (std::exception &e)
154     {
155         std::stringstream message;
156         message << "Exception (" << e.what() << ") caught from optimize.";
157         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
158         return V1_0::ErrorStatus::NONE;
159     }
160 
161     // Check that the optimized network is valid.
162     if (!optNet)
163     {
164         std::stringstream message;
165         message << "Invalid optimized network";
166         for (const std::string& msg : errMessages)
167         {
168             message << "\n" << msg;
169         }
170         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
171         return V1_0::ErrorStatus::NONE;
172     }
173 
174     // Export the optimized network graph to a dot file if an output dump directory
175     // has been specified in the drivers' arguments.
176     std::string dotGraphFileName = ExportNetworkGraphToDotFile(*optNet,
177                                                                options.GetRequestInputsAndOutputsDumpDir());
178 
179     // Load it into the runtime.
180     armnn::NetworkId netId = 0;
181     try
182     {
183         if (runtime->LoadNetwork(netId, move(optNet)) != armnn::Status::Success)
184         {
185             return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be loaded", cb);
186         }
187     }
188     catch (std::exception& e)
189     {
190         std::stringstream message;
191         message << "Exception (" << e.what()<< ") caught from LoadNetwork.";
192         FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, message.str(), cb);
193         return V1_0::ErrorStatus::NONE;
194     }
195 
196     // Now that we have a networkId for the graph rename the dump file to use it
197     // so that we can associate the graph file and the input/output tensor dump files
198     RenameGraphDotFile(dotGraphFileName,
199                        options.GetRequestInputsAndOutputsDumpDir(),
200                        netId);
201 
202     std::unique_ptr<ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>> preparedModel(
203             new ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>(
204                     netId,
205                     runtime.get(),
206                     model,
207                     options.GetRequestInputsAndOutputsDumpDir(),
208                     options.IsGpuProfilingEnabled()));
209 
210     // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
211     // this is enabled) before the first 'real' inference which removes the overhead of the first inference.
212     if (!preparedModel->ExecuteWithDummyInputs())
213     {
214         return FailPrepareModel(V1_0::ErrorStatus::GENERAL_FAILURE, "Network could not be executed", cb);
215     }
216 
217     if (clTunedParameters &&
218         options.GetClTunedParametersMode() == armnn::IGpuAccTunedParameters::Mode::UpdateTunedParameters)
219     {
220         // Now that we've done one inference the CL kernel parameters will have been tuned, so save the updated file.
221         try
222         {
223             clTunedParameters->Save(options.GetClTunedParametersFile().c_str());
224         }
225         catch (std::exception& error)
226         {
227             ALOGE("ArmnnDriverImpl::prepareModel: Failed to save CL tuned parameters file '%s': %s",
228                   options.GetClTunedParametersFile().c_str(), error.what());
229         }
230     }
231 
232     NotifyCallbackAndCheck(cb, V1_0::ErrorStatus::NONE, preparedModel.release());
233 
234     return V1_0::ErrorStatus::NONE;
235 }
236 
getCapabilities_1_2(const armnn::IRuntimePtr & runtime,V1_2::IDevice::getCapabilities_1_2_cb cb)237 Return<void> ArmnnDriverImpl::getCapabilities_1_2(const armnn::IRuntimePtr& runtime,
238                                                   V1_2::IDevice::getCapabilities_1_2_cb cb)
239 {
240     ALOGV("hal_1_2::ArmnnDriverImpl::getCapabilities()");
241 
242     V1_2::Capabilities capabilities;
243 
244     float defaultValue = .1f;
245 
246     if (runtime)
247     {
248         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime =
249                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
250 
251         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage =
252                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
253 
254         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime =
255                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformanceExecTime, defaultValue);
256 
257         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage =
258                 ParseSystemProperty(g_RelaxedFloat32toFloat16PerformancePowerUsage, defaultValue);
259 
260         // Set the base value for all operand types
261         #ifdef ARMNN_ANDROID_R
262         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({FLT_MAX, FLT_MAX});
263         #else
264         capabilities.operandPerformance = nonExtensionOperandPerformance({FLT_MAX, FLT_MAX});
265         #endif
266 
267         // Load supported operand types
268         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT32,
269                 {
270                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat32PerformanceExecTime, defaultValue),
271                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat32PerformancePowerUsage, defaultValue)
272                 });
273 
274         update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT32,
275                 {
276                     .execTime = ParseSystemProperty(g_OperandTypeFloat32PerformanceExecTime, defaultValue),
277                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat32PerformancePowerUsage, defaultValue)
278                 });
279 
280         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_FLOAT16,
281                 {
282                     .execTime = ParseSystemProperty(g_OperandTypeTensorFloat16PerformanceExecTime, defaultValue),
283                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorFloat16PerformancePowerUsage, defaultValue)
284                 });
285 
286         update(&capabilities.operandPerformance, V1_2::OperandType::FLOAT16,
287                 {
288                     .execTime = ParseSystemProperty(g_OperandTypeFloat16PerformanceExecTime, defaultValue),
289                     .powerUsage = ParseSystemProperty(g_OperandTypeFloat16PerformancePowerUsage, defaultValue)
290                 });
291 
292         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_ASYMM,
293                 {
294                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformanceExecTime, defaultValue),
295                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8AsymmPerformancePowerUsage, defaultValue)
296                 });
297 
298         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM,
299                 {
300                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformanceExecTime, defaultValue),
301                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerformancePowerUsage, defaultValue)
302                 });
303 
304         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT16_SYMM,
305                 {
306                     .execTime = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformanceExecTime, defaultValue),
307                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorQuant16SymmPerformancePowerUsage, defaultValue)
308                 });
309 
310         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL,
311                {
312                    .execTime =
313                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformanceExecTime, defaultValue),
314                    .powerUsage =
315                    ParseSystemProperty(g_OperandTypeTensorQuant8SymmPerChannelPerformancePowerUsage, defaultValue)
316                });
317 
318         update(&capabilities.operandPerformance, V1_2::OperandType::TENSOR_INT32,
319                 {
320                     .execTime = ParseSystemProperty(g_OperandTypeTensorInt32PerformanceExecTime, defaultValue),
321                     .powerUsage = ParseSystemProperty(g_OperandTypeTensorInt32PerformancePowerUsage, defaultValue)
322                 });
323 
324         update(&capabilities.operandPerformance, V1_2::OperandType::INT32,
325                 {
326                     .execTime = ParseSystemProperty(g_OperandTypeInt32PerformanceExecTime, defaultValue),
327                     .powerUsage = ParseSystemProperty(g_OperandTypeInt32PerformancePowerUsage, defaultValue)
328                 });
329 
330         cb(V1_0::ErrorStatus::NONE, capabilities);
331     }
332     else
333     {
334         capabilities.relaxedFloat32toFloat16PerformanceScalar.execTime   = 0;
335         capabilities.relaxedFloat32toFloat16PerformanceScalar.powerUsage = 0;
336         capabilities.relaxedFloat32toFloat16PerformanceTensor.execTime   = 0;
337         capabilities.relaxedFloat32toFloat16PerformanceTensor.powerUsage = 0;
338 
339         // Set the base value for all operand types
340         #ifdef ARMNN_ANDROID_R
341         capabilities.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_2>({0.f, 0.0f});
342         #else
343         capabilities.operandPerformance = nonExtensionOperandPerformance({0.f, 0.0f});
344         #endif
345 
346         cb(V1_0::ErrorStatus::DEVICE_UNAVAILABLE, capabilities);
347     }
348 
349     return Void();
350 }
351 
352 } // namespace hal_1_2
353 } // namespace armnn_driver
354