1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Manager"
18
19 #include "Manager.h"
20
21 #include <CpuExecutor.h>
22 #include <LegacyUtils.h>
23 #include <MetaModel.h>
24 #include <Tracing.h>
25 #include <android-base/properties.h>
26 #include <nnapi/IBurst.h>
27 #include <nnapi/IDevice.h>
28 #include <nnapi/IExecution.h>
29 #include <nnapi/IPreparedModel.h>
30 #include <nnapi/SharedMemory.h>
31 #include <nnapi/TypeUtils.h>
32 #include <nnapi/Types.h>
33 #include <nnapi/Validation.h>
34
35 #include <algorithm>
36 #include <functional>
37 #include <iterator>
38 #include <map>
39 #include <memory>
40 #include <regex>
41 #include <set>
42 #include <string>
43 #include <tuple>
44 #include <utility>
45 #include <vector>
46
47 #include "ExecutionCallback.h"
48 #include "Memory.h"
49 #include "ModelArgumentInfo.h"
50 #include "ServerFlag.h"
51 #include "TypeManager.h"
52
53 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
54 #include <build/version.h>
55 #include <cutils/native_handle.h>
56 #include <nnapi/hal/1.3/Buffer.h>
57 #include <nnapi/hal/Service.h>
58 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
59
60 #ifdef NN_EXPERIMENTAL_FEATURE
61 #include "NeuralNetworksExperimentalFeatures.h"
62 #endif // NN_EXPERIMENTAL_FEATURE
63
64 namespace android {
65 namespace nn {
66 namespace {
67
getRuntimeFeatureLevelVersionHelper()68 Version getRuntimeFeatureLevelVersionHelper() {
69 #if defined(NN_EXPERIMENTAL_FEATURE) && defined(NN_COMPATIBILITY_LIBRARY_BUILD)
70 #error "NN_EXPERIMENTAL_FEATURE is not supported when NN_COMPATIBILITY_LIBRARY_BUILD is defined"
71 #elif defined(NN_EXPERIMENTAL_FEATURE)
72 auto version = kVersionFeatureLevelExperimental;
73 // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
74 // features that are only available in the runtime.
75 version.runtimeOnlyFeatures = true;
76 #elif defined(NN_COMPATIBILITY_LIBRARY_BUILD)
77 auto version = serverFeatureLevelToVersion(kMaxFeatureLevelNum);
78 #else // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
79 auto version = serverFeatureLevelToVersion(getServerFeatureLevelFlag());
80 // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
81 // features that are only available in the runtime.
82 version.runtimeOnlyFeatures = true;
83 #endif // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
84 return version;
85 }
86
getRuntimeFeatureLevelVersion()87 Version getRuntimeFeatureLevelVersion() {
88 static const Version version = getRuntimeFeatureLevelVersionHelper();
89 return version;
90 }
91
getWhetherPlatformTelemetryIsEnabled()92 bool getWhetherPlatformTelemetryIsEnabled() {
93 #if !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
94 return getServerTelemetryEnableFlag();
95 #else // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
96 return false;
97 #endif // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
98 }
99
100 } // namespace
101
102 // A Device with actual underlying driver
103 class DriverDevice : public Device {
104 public:
105 // Create a DriverDevice from a name and a DeviceFactory function.
106 // Returns nullptr on failure.
107 static std::shared_ptr<DriverDevice> create(SharedDevice device);
108
109 // Prefer using DriverDevice::create
110 explicit DriverDevice(SharedDevice device);
111
getName() const112 const std::string& getName() const override { return kInterface->getName(); }
getVersionString() const113 const std::string& getVersionString() const override { return kInterface->getVersionString(); }
getFeatureLevel() const114 Version getFeatureLevel() const override { return kInterface->getFeatureLevel(); }
getType() const115 int32_t getType() const override { return static_cast<int32_t>(kInterface->getType()); }
getSupportedExtensions() const116 const std::vector<Extension>& getSupportedExtensions() const override {
117 return kInterface->getSupportedExtensions();
118 }
119 std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const120 const Capabilities& getCapabilities() const override { return kInterface->getCapabilities(); }
getPerformance(OperandType type) const121 Capabilities::PerformanceInfo getPerformance(OperandType type) const override {
122 return getCapabilities().operandPerformance.lookup(type);
123 }
getRelaxedFloat32toFloat16PerformanceScalar() const124 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
125 return getCapabilities().relaxedFloat32toFloat16PerformanceScalar;
126 }
getRelaxedFloat32toFloat16PerformanceTensor() const127 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
128 return getCapabilities().relaxedFloat32toFloat16PerformanceTensor;
129 }
getIfPerformance() const130 Capabilities::PerformanceInfo getIfPerformance() const override {
131 return getCapabilities().ifPerformance;
132 }
getWhilePerformance() const133 Capabilities::PerformanceInfo getWhilePerformance() const override {
134 return getCapabilities().whilePerformance;
135 }
getNumberOfCacheFilesNeeded() const136 std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
137 return kInterface->getNumberOfCacheFilesNeeded();
138 }
isCachingSupported() const139 bool isCachingSupported() const override {
140 // Caching is supported if either of numModelCache or numDataCache is greater than 0.
141 const auto [numModelCacheFiles, numDataCacheFiles] = getNumberOfCacheFilesNeeded();
142 return numModelCacheFiles > 0 || numDataCacheFiles > 0;
143 }
wait() const144 int wait() const override {
145 auto result = kInterface->wait();
146 if (!result.ok()) {
147 LOG(ERROR) << "DriverDevice::wait error: " << result.error().message;
148 return convertErrorStatusToResultCode(result.error().code);
149 }
150 return ANEURALNETWORKS_NO_ERROR;
151 }
152
153 std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
154 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
155 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
156 const std::optional<CacheToken>& maybeToken,
157 const std::vector<TokenValuePair>& metaData,
158 const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
159
160 std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
161 OperandType) const override;
162
163 private:
164 const SharedDevice kInterface;
165
166 GeneralResult<std::vector<bool>> getSupportedOperationsImpl(const MetaModel& metaModel) const;
167 GeneralResult<SharedPreparedModel> prepareModelFromCacheInternal(
168 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
169 const CacheToken& token) const;
170
171 #ifdef NN_DEBUGGABLE
172 // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
173 // 0 - all operations reported by IDevice::getSupportedOperations() supported
174 // 1 - some operations reported by IDevice::getSupportedOperations() supported
175 uint32_t mSupported = 0;
176 #endif // NN_DEBUGGABLE
177 };
178
179 // A RuntimePreparedModel with underlying IPreparedModel instance return by actual driver.
180 class DriverPreparedModel : public RuntimePreparedModel {
181 public:
DriverPreparedModel(const Device * device,const SharedPreparedModel & preparedModel)182 DriverPreparedModel(const Device* device, const SharedPreparedModel& preparedModel)
183 : mDevice(device), mPreparedModel(preparedModel) {
184 CHECK(mDevice != nullptr);
185 CHECK(mPreparedModel != nullptr);
186 }
187
getDevice() const188 const Device* getDevice() const override { return mDevice; }
getInterface() const189 SharedPreparedModel getInterface() const override { return mPreparedModel; }
190
191 std::tuple<int, std::vector<OutputShape>, Timing> execute(
192 const std::vector<ModelArgumentInfo>& inputs,
193 const std::vector<ModelArgumentInfo>& outputs,
194 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
195 MeasureTiming measure, const OptionalTimePoint& deadline,
196 const OptionalDuration& loopTimeoutDuration,
197 const std::vector<TokenValuePair>& metaData) const override;
198
199 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
200 const std::vector<ModelArgumentInfo>& inputs,
201 const std::vector<ModelArgumentInfo>& outputs,
202 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
203 MeasureTiming measure, const OptionalTimePoint& deadline,
204 const OptionalDuration& loopTimeoutDuration,
205 const OptionalDuration& timeoutDurationAfterFence,
206 const std::vector<TokenValuePair>& metaData) const override;
207
208 std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
209 const std::vector<ModelArgumentInfo>& inputs,
210 const std::vector<ModelArgumentInfo>& outputs,
211 const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
212 const OptionalDuration& loopTimeoutDuration,
213 const std::vector<TokenValuePair>& metaData) const override;
214
configureExecutionBurst() const215 GeneralResult<SharedBurst> configureExecutionBurst() const override {
216 return mPreparedModel->configureExecutionBurst();
217 }
218
getMemoryPreference() const219 MemoryPreference getMemoryPreference() const override {
220 if (isCompliantVersion(kVersionFeatureLevel5, mDevice->getFeatureLevel())) {
221 return {kDefaultRequestMemoryAlignment, kDefaultRequestMemoryPadding};
222 } else {
223 // We are not able to pass memory padding information to HIDL drivers, so return the
224 // minimum padding.
225 return {kDefaultRequestMemoryAlignment, kMinMemoryPadding};
226 }
227 }
228
229 private:
230 const Device* mDevice;
231 const SharedPreparedModel mPreparedModel;
232 };
233
234 class DriverExecution : public RuntimeExecution {
235 public:
DriverExecution(SharedExecution execution,Request request,std::vector<const RuntimeMemory * > memories,MeasureTiming measure,OptionalDuration loopTimeoutDuration,Version deviceFeatureLevel,const std::vector<TokenValuePair> & metaData)236 DriverExecution(SharedExecution execution, Request request,
237 std::vector<const RuntimeMemory*> memories, MeasureTiming measure,
238 OptionalDuration loopTimeoutDuration, Version deviceFeatureLevel,
239 const std::vector<TokenValuePair>& metaData)
240 : kExecution(std::move(execution)),
241 kRequest(std::move(request)),
242 kMemories(std::move(memories)),
243 kMeasure(measure),
244 kLoopTimeoutDuration(std::move(loopTimeoutDuration)),
245 kDeviceFeatureLevel(deviceFeatureLevel),
246 kMetaData(metaData) {
247 CHECK(kExecution != nullptr);
248 }
249
250 std::tuple<int, std::vector<OutputShape>, Timing> compute(
251 const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
252
253 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
254 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
255 const OptionalDuration& timeoutDurationAfterFence) const override;
256
257 private:
258 const SharedExecution kExecution;
259
260 // For burst execution.
261 const Request kRequest;
262 const std::vector<const RuntimeMemory*> kMemories;
263 const MeasureTiming kMeasure;
264 const OptionalDuration kLoopTimeoutDuration;
265 mutable std::map<const IBurst*, SharedExecution> mCachedBurstExecutions;
266
267 // For fenced execution.
268 const Version kDeviceFeatureLevel;
269
270 // Execution metadata.
271 std::vector<TokenValuePair> kMetaData;
272 };
273
DriverDevice(SharedDevice device)274 DriverDevice::DriverDevice(SharedDevice device) : kInterface(std::move(device)) {
275 CHECK(kInterface != nullptr);
276 #ifdef NN_DEBUGGABLE
277 static const char samplePrefix[] = "sample";
278 if (getName().substr(0, sizeof(samplePrefix) - 1) == samplePrefix) {
279 mSupported = getProp("debug.nn.sample.supported");
280 }
281 #endif // NN_DEBUGGABLE
282 }
283
create(SharedDevice device)284 std::shared_ptr<DriverDevice> DriverDevice::create(SharedDevice device) {
285 if (device == nullptr) {
286 LOG(ERROR) << "DriverDevice::create called with nullptr";
287 return nullptr;
288 }
289
290 return std::make_shared<DriverDevice>(std::move(device));
291 }
292
versionToFeatureLevel(Version::Level versionLevel)293 int64_t DeviceManager::versionToFeatureLevel(Version::Level versionLevel) {
294 switch (versionLevel) {
295 case Version::Level::FEATURE_LEVEL_1:
296 return ANEURALNETWORKS_FEATURE_LEVEL_1;
297 case Version::Level::FEATURE_LEVEL_2:
298 return ANEURALNETWORKS_FEATURE_LEVEL_2;
299 case Version::Level::FEATURE_LEVEL_3:
300 return ANEURALNETWORKS_FEATURE_LEVEL_3;
301 case Version::Level::FEATURE_LEVEL_4:
302 return ANEURALNETWORKS_FEATURE_LEVEL_4;
303 case Version::Level::FEATURE_LEVEL_5:
304 return ANEURALNETWORKS_FEATURE_LEVEL_5;
305 case Version::Level::FEATURE_LEVEL_6:
306 return ANEURALNETWORKS_FEATURE_LEVEL_6;
307 case Version::Level::FEATURE_LEVEL_7:
308 return ANEURALNETWORKS_FEATURE_LEVEL_7;
309 case Version::Level::FEATURE_LEVEL_8:
310 return ANEURALNETWORKS_FEATURE_LEVEL_8;
311 #ifdef NN_EXPERIMENTAL_FEATURE
312 case Version::Level::FEATURE_LEVEL_EXPERIMENTAL:
313 return ANEURALNETWORKS_FEATURE_LEVEL_EXPERIMENTAL;
314 #endif // NN_EXPERIMENTAL_FEATURE
315 }
316 LOG(FATAL) << "Unrecognized version " << versionLevel;
317 return -1;
318 }
319
getSupportedOperationsImpl(const MetaModel & metaModel) const320 GeneralResult<std::vector<bool>> DriverDevice::getSupportedOperationsImpl(
321 const MetaModel& metaModel) const {
322 const auto featureLevel = kInterface->getFeatureLevel();
323 const auto slice = metaModel.getSlice(featureLevel);
324 if (!slice.has_value()) {
325 return NN_ERROR() << "getSlice(" << featureLevel << ") failed";
326 }
327
328 const auto& [sliceModel, slicedModelOperationIndexToModelOperationIndex] = *slice;
329 const std::vector<bool> supported = NN_TRY(kInterface->getSupportedOperations(sliceModel));
330 const uint32_t slicedOperationCount = sliceModel.main.operations.size();
331 if (supported.size() != slicedOperationCount) {
332 return NN_ERROR() << "IDevice::getSupportedOperations returned a vector of length "
333 << supported.size() << " when expecting " << slicedOperationCount;
334 }
335
336 const Model& model = metaModel.getModel();
337 const uint32_t operationCount = model.main.operations.size();
338 std::vector<bool> remappedSupported(operationCount, false);
339 for (size_t i = 0; i < supported.size(); ++i) {
340 if (supported[i]) {
341 remappedSupported[slicedModelOperationIndexToModelOperationIndex(i)] = true;
342 }
343 }
344 return remappedSupported;
345 }
346
getSupportedOperations(const MetaModel & metaModel) const347 std::vector<bool> DriverDevice::getSupportedOperations(const MetaModel& metaModel) const {
348 const Model& model = metaModel.getModel();
349
350 auto result = getSupportedOperationsImpl(metaModel);
351 if (!result.ok()) {
352 LOG(ERROR) << "getSupportedOperations failed with code " << result.error().code << ": "
353 << result.error().message;
354 // Set the supported operation vectors to all false, so we won't use this driver.
355 return std::vector<bool>(model.main.operations.size(), false);
356 }
357
358 std::vector<bool>& supportedOperations = result.value();
359 #ifdef NN_DEBUGGABLE
360 if (mSupported != 1) {
361 return supportedOperations;
362 }
363
364 const uint32_t baseAccumulator = std::hash<std::string>{}(getName());
365 for (size_t operationIndex = 0; operationIndex < supportedOperations.size(); operationIndex++) {
366 if (!supportedOperations[operationIndex]) {
367 continue;
368 }
369
370 uint32_t accumulator = baseAccumulator;
371 const Operation& operation = model.main.operations[operationIndex];
372 accumulator ^= static_cast<uint32_t>(operation.type);
373 auto accumulateOperands = [&model, &accumulator](const std::vector<uint32_t>& operands) {
374 for (uint32_t operandIndex : operands) {
375 const Operand& operand = model.main.operands[operandIndex];
376 accumulator ^= static_cast<uint32_t>(operand.type);
377 accumulator ^= operand.dimensions.size();
378 for (const Dimension& dimension : operand.dimensions) {
379 accumulator ^= dimension;
380 if (operand.lifetime == Operand::LifeTime::CONSTANT_COPY ||
381 operand.lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
382 operand.lifetime == Operand::LifeTime::POINTER) {
383 accumulator ^= 1;
384 }
385 }
386 }
387 };
388 accumulateOperands(operation.inputs);
389 accumulateOperands(operation.outputs);
390 if (accumulator & 1) {
391 supportedOperations[operationIndex] = false;
392 }
393 }
394 #endif // NN_DEBUGGABLE
395
396 return supportedOperations;
397 }
398
399 // Opens a cache file for reading and writing and returns a shared handle.
createCacheHandle(const std::string & filename,bool createIfNotExist)400 static GeneralResult<SharedHandle> createCacheHandle(const std::string& filename,
401 bool createIfNotExist) {
402 auto fd = base::unique_fd(open(filename.c_str(), createIfNotExist ? (O_RDWR | O_CREAT) : O_RDWR,
403 S_IRUSR | S_IWUSR));
404 if (!fd.ok()) {
405 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
406 << "Failed to " << (createIfNotExist ? "open or create" : "open") << " cache file "
407 << filename;
408 }
409 return std::make_shared<const Handle>(std::move(fd));
410 }
411
412 // Opens a list of cache files and returns a vector of shared handles. The files
413 // are always opened with both read and write permissions.
createCacheHandleVec(uint32_t numCacheFiles,const std::string & baseFilename,bool createIfNotExist)414 static GeneralResult<std::vector<SharedHandle>> createCacheHandleVec(
415 uint32_t numCacheFiles, const std::string& baseFilename, bool createIfNotExist) {
416 CHECK(numCacheFiles <= kMaxNumberOfCacheFiles);
417 std::vector<SharedHandle> handles;
418 handles.reserve(numCacheFiles);
419 for (uint32_t i = 0; i < numCacheFiles; i++) {
420 std::string filename = baseFilename + std::to_string(i);
421 VLOG(COMPILATION) << "Cache " << i << ": " << filename;
422 handles.push_back(NN_TRY(createCacheHandle(filename, createIfNotExist)));
423 }
424 return handles;
425 }
426
427 // Maps a token to cache file names and returns a pair of vectors of shared
428 // handles to the opened files.
getCacheHandles(const CacheInfo & cacheInfo,const CacheToken & token,const std::pair<uint32_t,uint32_t> & numCacheFiles,bool createIfNotExist)429 static GeneralResult<CacheHandles> getCacheHandles(
430 const CacheInfo& cacheInfo, const CacheToken& token,
431 const std::pair<uint32_t, uint32_t>& numCacheFiles, bool createIfNotExist) {
432 if (const auto* cacheHandles = std::get_if<CacheHandles>(&cacheInfo.variant)) {
433 if (cacheHandles->modelCache.size() != numCacheFiles.first) {
434 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
435 << "Expected " << numCacheFiles.first << " model cache handles, got "
436 << cacheHandles->modelCache.size();
437 }
438 if (cacheHandles->dataCache.size() != numCacheFiles.second) {
439 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
440 << "Expected " << numCacheFiles.second << " data cache handles, got "
441 << cacheHandles->dataCache.size();
442 }
443 return *cacheHandles;
444 }
445
446 // The filename includes kByteSizeOfCacheToken * 2 characters for token,
447 // and 1 character for model/data cache identifier.
448 std::string filename(kByteSizeOfCacheToken * 2 + 1, '0');
449 for (uint32_t i = 0; i < kByteSizeOfCacheToken; i++) {
450 filename[i * 2] = 'A' + (token[i] & 0x0F);
451 filename[i * 2 + 1] = 'A' + (token[i] >> 4);
452 }
453
454 const auto& cacheDir = std::get<CacheDir>(cacheInfo.variant);
455 CHECK(cacheDir.empty() || cacheDir.back() == '/');
456 std::string cacheFileName = cacheDir + filename;
457 const uint32_t cacheTypeIdentifierIndex = cacheDir.size() + kByteSizeOfCacheToken * 2;
458
459 cacheFileName[cacheTypeIdentifierIndex] = '1';
460 std::vector<SharedHandle> modelCache =
461 NN_TRY(createCacheHandleVec(numCacheFiles.first, cacheFileName, createIfNotExist));
462
463 cacheFileName[cacheTypeIdentifierIndex] = '2';
464 std::vector<SharedHandle> dataCache =
465 NN_TRY(createCacheHandleVec(numCacheFiles.second, cacheFileName, createIfNotExist));
466
467 return CacheHandles{
468 .modelCache = std::move(modelCache),
469 .dataCache = std::move(dataCache),
470 };
471 }
472
prepareModelFromCacheInternal(const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const CacheToken & token) const473 GeneralResult<SharedPreparedModel> DriverDevice::prepareModelFromCacheInternal(
474 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
475 const CacheToken& token) const {
476 // Get cache files if they exist, otherwise return from the function early.
477 auto cache = NN_TRY(getCacheHandles(cacheInfo, token, kInterface->getNumberOfCacheFilesNeeded(),
478 /*createIfNotExist=*/false));
479 return kInterface->prepareModelFromCache(deadline, cache.modelCache, cache.dataCache, token);
480 }
481
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> & metaData,const std::vector<ExtensionNameAndPrefix> & extensionNameAndPrefix) const482 std::pair<int, std::shared_ptr<RuntimePreparedModel>> DriverDevice::prepareModel(
483 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
484 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
485 const std::optional<CacheToken>& maybeToken, const std::vector<TokenValuePair>& metaData,
486 const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const {
487 // Attempt to compile from cache if token is present.
488 if (maybeToken.has_value()) {
489 auto result = prepareModelFromCacheInternal(deadline, cacheInfo, *maybeToken);
490 if (result.has_value()) {
491 LOG(INFO) << "prepareModelFromCache: successfully prepared model from cache";
492 return {ANEURALNETWORKS_NO_ERROR,
493 std::make_shared<DriverPreparedModel>(this, std::move(result).value())};
494 } else {
495 LOG(ERROR) << "prepareModelFromCache failure (" << result.error().code
496 << "): " << result.error().message;
497 }
498 }
499
500 // Get cache files if they exist, otherwise create them.
501 CacheHandles cache;
502 if (maybeToken.has_value()) {
503 auto result =
504 getCacheHandles(cacheInfo, *maybeToken, kInterface->getNumberOfCacheFilesNeeded(),
505 /*createIfNotExist=*/true);
506 if (result.has_value()) {
507 cache = std::move(result).value();
508 } else {
509 LOG(ERROR) << "getCacheHandles failure (" << result.error().code
510 << "): " << result.error().message;
511 }
512 }
513
514 // Get the token if it exists, otherwise get a null token.
515 static constexpr CacheToken kNullToken = {};
516 const CacheToken token = maybeToken.value_or(kNullToken);
517
518 // Fallback to full compilation (possibly with token) if
519 // prepareModelFromCache could not be used or failed.
520 const Model model = makeModel();
521 auto result =
522 kInterface->prepareModel(model, preference, priority, deadline, cache.modelCache,
523 cache.dataCache, token, metaData, extensionNameAndPrefix);
524 if (!result.ok()) {
525 LOG(ERROR) << "IDevice::prepareModel() error: " << result.error().message;
526 return {convertErrorStatusToResultCode(result.error().code), nullptr};
527 }
528 SharedPreparedModel preparedModel = std::move(result).value();
529 CHECK(preparedModel != nullptr)
530 << "IDevice::prepareModel() returned nullptr without error code";
531 return {ANEURALNETWORKS_NO_ERROR,
532 std::make_shared<DriverPreparedModel>(this, std::move(preparedModel))};
533 }
534
allocate(const MemoryDescriptor & desc,OperandType) const535 std::pair<int, std::unique_ptr<RuntimeMemory>> DriverDevice::allocate(const MemoryDescriptor& desc,
536 OperandType) const {
537 const BufferDesc bufferDesc = {.dimensions = desc.dimensions};
538 std::vector<SharedPreparedModel> preparedModels(desc.preparedModels.size());
539 std::transform(desc.preparedModels.begin(), desc.preparedModels.end(), preparedModels.begin(),
540 [](const auto* preparedModel) {
541 const auto versionedPreparedModel = preparedModel->getInterface();
542 CHECK(versionedPreparedModel != nullptr);
543 return versionedPreparedModel;
544 });
545 auto result =
546 kInterface->allocate(bufferDesc, preparedModels, desc.inputRoles, desc.outputRoles);
547 if (!result.ok()) {
548 LOG(ERROR) << "DriverDevice::allocate -- memory allocation on device " << getName()
549 << " failed!";
550 return {convertErrorStatusToResultCode(result.error().code), nullptr};
551 }
552 return MemoryFromDevice::create(std::move(result).value());
553 }
554
createDriverRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)555 static Request createDriverRequest(const std::vector<ModelArgumentInfo>& inputs,
556 const std::vector<ModelArgumentInfo>& outputs,
557 const std::vector<const RuntimeMemory*>& memories) {
558 Request request;
559 request.inputs.reserve(inputs.size());
560 std::transform(inputs.begin(), inputs.end(), std::back_inserter(request.inputs),
561 [](const auto& input) { return input.createRequestArgument(); });
562 request.outputs.reserve(outputs.size());
563 std::transform(outputs.begin(), outputs.end(), std::back_inserter(request.outputs),
564 [](const auto& output) { return output.createRequestArgument(); });
565 request.pools.reserve(memories.size());
566 std::transform(memories.begin(), memories.end(), std::back_inserter(request.pools),
567 [](const RuntimeMemory* memory) { return memory->getMemoryPool(); });
568 return request;
569 }
570
571 // Perform computation on an actual device driver.
572 //
573 // Because HIDL cannot take raw pointers, two separate memory pools will be allocated for inputs and
574 // outputs specified by pointers. The input pointer data will be copied to the input pool prior to
575 // execution, and the output pointer data will be copied out from the output pool after the
576 // execution.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst & burstController,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const577 std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
578 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
579 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
580 MeasureTiming measure, const OptionalTimePoint& deadline,
581 const OptionalDuration& loopTimeoutDuration,
582 const std::vector<TokenValuePair>& metaData) const {
583 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
584
585 auto request = createDriverRequest(inputs, outputs, memories);
586
587 NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::execute::execute");
588
589 ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> result;
590
591 // compute using burst if present, otherwise compute from IPreparedModel
592 const bool burstCompute = (burstController != nullptr);
593 if (burstCompute) {
594 for (const RuntimeMemory* memory : memories) {
595 const auto pool = memory->getMemoryPool();
596 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
597 auto cacheHold = burstController->cacheMemory(*maybeMemory);
598 memory->hold(cacheHold);
599 }
600 }
601
602 VLOG(EXECUTION) << "Before burstController->execute() " << SHOW_IF_DEBUG(request);
603 result = burstController->execute(request, measure, deadline, loopTimeoutDuration, metaData,
604 TypeManager::get()->getExtensionNameAndPrefix(metaData));
605 } else {
606 result = mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
607 TypeManager::get()->getExtensionNameAndPrefix(metaData));
608 }
609
610 int n = ANEURALNETWORKS_OP_FAILED;
611 std::vector<OutputShape> outputShapes;
612 Timing timing;
613
614 if (result.ok()) {
615 n = ANEURALNETWORKS_NO_ERROR;
616 std::tie(outputShapes, timing) = std::move(result).value();
617 } else {
618 auto [message, code, returnedOutputShapes] = std::move(result).error();
619 VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
620 LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
621 << "::execute(...) error: " << message;
622 n = convertErrorStatusToResultCode(code);
623 if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
624 outputShapes = std::move(returnedOutputShapes);
625 }
626 return {n, std::move(outputShapes), timing};
627 }
628
629 VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
630 return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
631 }
632
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & timeoutDurationAfterFence,const std::vector<TokenValuePair> & metaData) const633 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverPreparedModel::executeFenced(
634 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
635 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
636 MeasureTiming measure, const OptionalTimePoint& deadline,
637 const OptionalDuration& loopTimeoutDuration,
638 const OptionalDuration& timeoutDurationAfterFence,
639 const std::vector<TokenValuePair>& metaData) const {
640 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
641 CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
642
643 auto request = createDriverRequest(inputs, outputs, memories);
644
645 NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::executeFenced");
646
647 std::vector<SyncFence> waitForHandles;
648 waitForHandles.reserve(waitFor.size());
649 for (int fd : waitFor) {
650 int dupFd = dup(fd);
651 if (dupFd < 0) {
652 LOG(ERROR) << "Unable to dup the file descriptor";
653 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
654 }
655 waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
656 }
657
658 SyncFence syncFence = SyncFence::createAsSignaled();
659 ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
660 Timing timing = {};
661 if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, mDevice->getFeatureLevel())) {
662 auto result = mPreparedModel->executeFenced(
663 request, waitForHandles, measure, deadline, loopTimeoutDuration,
664 timeoutDurationAfterFence, metaData,
665 TypeManager::get()->getExtensionNameAndPrefix(metaData));
666 if (!result.ok()) {
667 LOG(ERROR) << "IPreparedModel::executeFenced() error: " << result.error().message;
668 VLOG(EXECUTION) << "**executeFenced failed**";
669 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
670 }
671 std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
672 } else {
673 // Fallback to synchronous execution if executeFenced is not supported.
674 // First wait for all sync fences to be ready.
675 LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
676 for (const auto& fence : waitForHandles) {
677 if (!fence.hasFd() || fence.getFd() < 0) {
678 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
679 }
680 auto r = fence.syncWait({/* no timeout */});
681 if (r != SyncFence::FenceState::SIGNALED) {
682 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
683 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
684 }
685 }
686 auto result =
687 mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
688 TypeManager::get()->getExtensionNameAndPrefix(metaData));
689 if (!result.ok()) {
690 LOG(ERROR) << "IPreparedModel::execute() error: " << result.error().message;
691 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
692 }
693 std::tie(std::ignore, timing) = result.value();
694 }
695
696 int syncFenceFd = -1;
697 if (syncFence.hasFd()) {
698 syncFenceFd = dup(syncFence.getFd());
699 if (syncFenceFd < 0) {
700 LOG(ERROR) << "Failed to dup the file descriptor";
701 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
702 }
703 }
704
705 VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
706 return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
707 }
708
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming measure,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const709 std::pair<int, std::shared_ptr<RuntimeExecution>> DriverPreparedModel::createReusableExecution(
710 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
711 const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
712 const OptionalDuration& loopTimeoutDuration,
713 const std::vector<TokenValuePair>& metaData) const {
714 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::createReusableExecution");
715
716 auto request = createDriverRequest(inputs, outputs, memories);
717 auto result = mPreparedModel->createReusableExecution(
718 request, measure, loopTimeoutDuration, metaData,
719 TypeManager::get()->getExtensionNameAndPrefix(metaData));
720 if (!result.ok()) {
721 LOG(ERROR) << "IPreparedModel::createReusableExecution() error: " << result.error().message;
722 const int n = convertErrorStatusToResultCode(result.error().code);
723 return {n, nullptr};
724 }
725 auto execution = std::make_shared<DriverExecution>(
726 std::move(result).value(), std::move(request), memories, measure, loopTimeoutDuration,
727 mDevice->getFeatureLevel(), metaData);
728 return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
729 }
730
compute(const SharedBurst & burstController,const OptionalTimePoint & deadline) const731 std::tuple<int, std::vector<OutputShape>, Timing> DriverExecution::compute(
732 const SharedBurst& burstController, const OptionalTimePoint& deadline) const {
733 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::compute");
734
735 // compute using burst if present, otherwise compute from IPreparedModel
736 SharedExecution execution;
737 const bool burstCompute = (burstController != nullptr);
738 if (burstCompute) {
739 // create a reusable burst execution if the controller is not seen before
740 auto burstExecution = mCachedBurstExecutions.find(burstController.get());
741 if (burstExecution == mCachedBurstExecutions.end()) {
742 for (const RuntimeMemory* memory : kMemories) {
743 const auto pool = memory->getMemoryPool();
744 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
745 auto cacheHold = burstController->cacheMemory(*maybeMemory);
746 memory->hold(cacheHold);
747 }
748 }
749 auto createResult = burstController->createReusableExecution(
750 kRequest, kMeasure, kLoopTimeoutDuration, kMetaData,
751 TypeManager::get()->getExtensionNameAndPrefix(kMetaData));
752 if (!createResult.ok()) {
753 LOG(ERROR) << "IBurst::createReusableExecution() error: "
754 << createResult.error().message;
755 const int n = convertErrorStatusToResultCode(createResult.error().code);
756 return {n, {}, {}};
757 }
758 execution = std::move(createResult).value();
759 mCachedBurstExecutions.emplace(burstController.get(), execution);
760 } else {
761 execution = burstExecution->second;
762 }
763 VLOG(EXECUTION) << "Before mBurstExecution->compute() " << SHOW_IF_DEBUG(kRequest);
764 } else {
765 execution = kExecution;
766 }
767
768 CHECK(execution != nullptr);
769 auto result = execution->compute(deadline);
770 if (!result.ok()) {
771 auto [message, code, returnedOutputShapes] = std::move(result).error();
772 int n = convertErrorStatusToResultCode(code);
773 VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
774 LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
775 << "::execute(...) error: " << message;
776 if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
777 return {n, std::move(returnedOutputShapes), {}};
778 }
779 return {n, {}, {}};
780 }
781
782 VLOG(EXECUTION) << "DriverExecution::compute completed";
783 auto [outputShapes, timing] = std::move(result).value();
784 return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
785 }
786
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & timeoutDurationAfterFence) const787 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverExecution::computeFenced(
788 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
789 const OptionalDuration& timeoutDurationAfterFence) const {
790 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::computeFenced");
791 CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
792
793 std::vector<SyncFence> waitForHandles;
794 waitForHandles.reserve(waitFor.size());
795 for (int fd : waitFor) {
796 int dupFd = dup(fd);
797 if (dupFd < 0) {
798 LOG(ERROR) << "Unable to dup the file descriptor";
799 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
800 }
801 waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
802 }
803
804 SyncFence syncFence = SyncFence::createAsSignaled();
805 ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
806 Timing timing = {};
807 if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, kDeviceFeatureLevel)) {
808 auto result =
809 kExecution->computeFenced(waitForHandles, deadline, timeoutDurationAfterFence);
810 if (!result.ok()) {
811 LOG(ERROR) << "IExecution::computeFenced() error: " << result.error().message;
812 VLOG(EXECUTION) << "**computeFenced failed**";
813 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
814 }
815 std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
816 } else {
817 // Fallback to synchronous execution if computeFenced is not supported.
818 // First wait for all sync fences to be ready.
819 LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
820 for (const auto& fence : waitForHandles) {
821 if (!fence.hasFd() || fence.getFd() < 0) {
822 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
823 }
824 auto r = fence.syncWait({/* no timeout */});
825 if (r != SyncFence::FenceState::SIGNALED) {
826 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
827 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
828 }
829 }
830 auto result = kExecution->compute(deadline);
831 if (!result.ok()) {
832 LOG(ERROR) << "IExecution::compute() error: " << result.error().message;
833 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
834 }
835 std::tie(std::ignore, timing) = result.value();
836 }
837
838 int syncFenceFd = -1;
839 if (syncFence.hasFd()) {
840 syncFenceFd = dup(syncFence.getFd());
841 if (syncFenceFd < 0) {
842 LOG(ERROR) << "Failed to dup the file descriptor";
843 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
844 }
845 }
846
847 VLOG(EXECUTION) << "DriverExecution::computeFenced completed";
848 return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
849 }
850
createCpuCapabilities()851 static Capabilities createCpuCapabilities() {
852 constexpr Capabilities::PerformanceInfo kPerf = {.execTime = 1.0f, .powerUsage = 1.0f};
853 return makeCapabilities(kPerf, kPerf, kPerf);
854 }
855
856 // A special abstracted device for the CPU. Only one instance of this class will exist.
857 // Use get() to retrieve it.
858 class CpuDevice : public Device {
859 public:
860 // Returns the singleton CPU fallback device.
get()861 static std::shared_ptr<CpuDevice> get() {
862 static std::shared_ptr<CpuDevice> instance(new CpuDevice);
863 return instance;
864 }
865
getName() const866 const std::string& getName() const override { return kName; }
getVersionString() const867 const std::string& getVersionString() const override { return kVersionString; }
getFeatureLevel() const868 Version getFeatureLevel() const override { return kVersion; }
getType() const869 int32_t getType() const override { return ANEURALNETWORKS_DEVICE_CPU; }
getSupportedExtensions() const870 const std::vector<Extension>& getSupportedExtensions() const override {
871 return kSupportedExtensions;
872 }
873 std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const874 const Capabilities& getCapabilities() const override { return kCapabilities; }
getPerformance(OperandType) const875 Capabilities::PerformanceInfo getPerformance(OperandType) const override {
876 return kPerformance;
877 }
getRelaxedFloat32toFloat16PerformanceScalar() const878 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
879 return kPerformance;
880 }
getRelaxedFloat32toFloat16PerformanceTensor() const881 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
882 return kPerformance;
883 }
getIfPerformance() const884 Capabilities::PerformanceInfo getIfPerformance() const override { return kPerformance; }
getWhilePerformance() const885 Capabilities::PerformanceInfo getWhilePerformance() const override { return kPerformance; }
getNumberOfCacheFilesNeeded() const886 std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
887 return {/*numModelCache=*/0, /*numDataCache=*/0};
888 }
isCachingSupported() const889 bool isCachingSupported() const override { return false; }
wait() const890 int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
891
892 std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
893 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
894 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
895 const std::optional<CacheToken>& maybeToken,
896 const std::vector<TokenValuePair>& metaData,
897 const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
898
899 std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
900 OperandType type) const override;
901
902 private:
903 CpuDevice() = default;
904 const Version kVersion = getRuntimeFeatureLevelVersion();
905 const std::string kName = "nnapi-reference";
906 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
907 const std::string kVersionString = build::GetBuildNumber();
908 #else
909 const std::string kVersionString = "UNKNOWN";
910 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
911 // Since the performance is a ratio compared to the CPU performance,
912 // by definition the performance of the CPU is 1.0.
913 const Capabilities::PerformanceInfo kPerformance = {.execTime = 1.0f, .powerUsage = 1.0f};
914 const Capabilities kCapabilities = createCpuCapabilities();
915 const std::vector<Extension> kSupportedExtensions{/* No extensions. */};
916 };
917
918 // A special abstracted RuntimePreparedModel for the CPU, constructed by CpuDevice.
919 class CpuPreparedModel : public RuntimePreparedModel {
920 public:
921 // Factory method for CpuPreparedModel. Returns ANEURALNETWORKS_NO_ERROR and
922 // a prepared model object if successfully created. Returns an error code
923 // and nullptr otherwise.
924 static std::pair<int, std::shared_ptr<RuntimePreparedModel>> create(Model model);
925
getDevice() const926 const Device* getDevice() const override { return CpuDevice::get().get(); }
getInterface() const927 SharedPreparedModel getInterface() const override { return nullptr; }
928
929 std::tuple<int, std::vector<OutputShape>, Timing> execute(
930 const std::vector<ModelArgumentInfo>& inputs,
931 const std::vector<ModelArgumentInfo>& outputs,
932 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
933 MeasureTiming measure, const OptionalTimePoint& deadline,
934 const OptionalDuration& loopTimeoutDuration,
935 const std::vector<TokenValuePair>& metaData) const override;
936
configureExecutionBurst() const937 GeneralResult<SharedBurst> configureExecutionBurst() const override { return nullptr; }
938
939 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
940 const std::vector<ModelArgumentInfo>& inputs,
941 const std::vector<ModelArgumentInfo>& outputs,
942 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
943 MeasureTiming measure, const OptionalTimePoint& deadline,
944 const OptionalDuration& loopTimeoutDuration,
945 const OptionalDuration& timeoutDurationAfterFence,
946 const std::vector<TokenValuePair>& metaData) const override;
947
948 std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
949 const std::vector<ModelArgumentInfo>& inputs,
950 const std::vector<ModelArgumentInfo>& outputs,
951 const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
952 const OptionalDuration& loopTimeoutDuration,
953 const std::vector<TokenValuePair>& metaData) const override;
954
getMemoryPreference() const955 MemoryPreference getMemoryPreference() const override {
956 return {kPreferredAlignment, kPreferredPadding};
957 }
958
959 // Prefer to use CpuPreparedModel::create.
CpuPreparedModel(Model model,std::vector<RunTimePoolInfo> poolInfos)960 CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
961 : mModel(std::move(model)), mModelPoolInfos(std::move(poolInfos)) {}
962
getModel() const963 const Model& getModel() const { return mModel; }
getModelPoolInfos() const964 const std::vector<RunTimePoolInfo>& getModelPoolInfos() const { return mModelPoolInfos; }
965
966 private:
967 // TFLite kernels prefers 64 bytes for padding and alignment.
968 static constexpr uint32_t kPreferredAlignment = 64;
969 static constexpr uint32_t kPreferredPadding = 64;
970
971 const Model mModel;
972 const std::vector<RunTimePoolInfo> mModelPoolInfos;
973 };
974
975 class CpuExecution : public RuntimeExecution {
976 public:
CpuExecution(const CpuPreparedModel & preparedModel,Request request,std::vector<RunTimePoolInfo> requestPoolInfos,OptionalDuration loopTimeoutDuration)977 CpuExecution(const CpuPreparedModel& preparedModel, Request request,
978 std::vector<RunTimePoolInfo> requestPoolInfos,
979 OptionalDuration loopTimeoutDuration)
980 : kPreparedModel(preparedModel),
981 kRequest(std::move(request)),
982 kRequestPoolInfos(std::move(requestPoolInfos)),
983 kLoopTimeoutDuration(std::move(loopTimeoutDuration)) {}
984
985 std::tuple<int, std::vector<OutputShape>, Timing> compute(
986 const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
987
988 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
989 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
990 const OptionalDuration& timeoutDurationAfterFence) const override;
991
992 private:
993 const CpuPreparedModel& kPreparedModel;
994 Request kRequest;
995 std::vector<RunTimePoolInfo> kRequestPoolInfos;
996 const OptionalDuration kLoopTimeoutDuration;
997 };
998
getSupportedOperations(const MetaModel & metaModel) const999 std::vector<bool> CpuDevice::getSupportedOperations(const MetaModel& metaModel) const {
1000 const Model& model = metaModel.getModel();
1001 const size_t count = model.main.operations.size();
1002 std::vector<bool> result(count, false);
1003 for (size_t i = 0; i < count; i++) {
1004 // TODO(b/119870033): Decide whether and how post-P operations would be supported on CPU.
1005 // We may want to use the slicer for CpuDevice just as we do for
1006 // DriverDevice.
1007 OperationType operationType = model.main.operations[i].type;
1008 result[i] = !isExtension(operationType) && operationType != OperationType::OEM_OPERATION;
1009 }
1010 return result;
1011 }
1012
1013 template <typename Type>
validateAndCheckCompliance(const Type & object)1014 static Result<void> validateAndCheckCompliance(const Type& object) {
1015 const auto version = NN_TRY(validate(object));
1016 if (!isCompliantVersion(version, DeviceManager::get()->getRuntimeVersion())) {
1017 return NN_ERROR() << "Object than is newer what is allowed. Version needed: " << version
1018 << ", current runtime version supported: "
1019 << DeviceManager::get()->getRuntimeVersion();
1020 }
1021 return {};
1022 }
1023
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo &,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> &,const std::vector<ExtensionNameAndPrefix> &) const1024 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuDevice::prepareModel(
1025 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
1026 const OptionalTimePoint& deadline, const CacheInfo& /*cacheInfo*/,
1027 const std::optional<CacheToken>& maybeToken,
1028 const std::vector<TokenValuePair>& /*metaData*/,
1029 const std::vector<ExtensionNameAndPrefix>& /*extensionNameAndPrefix*/) const {
1030 CHECK(!maybeToken.has_value())
1031 << "Should never call prepareModel with cache information on CpuDevice";
1032
1033 const Model model = makeModel();
1034 if (auto result = validateAndCheckCompliance(model); !result.ok()) {
1035 LOG(ERROR) << "Invalid Model: " << result.error();
1036 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1037 }
1038 if (auto result = validateAndCheckCompliance(preference); !result.ok()) {
1039 LOG(ERROR) << "Invalid ExecutionPreference: " << result.error();
1040 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1041 }
1042 if (auto result = validateAndCheckCompliance(priority); !result.ok()) {
1043 LOG(ERROR) << "Invalid Priority: " << result.error();
1044 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1045 }
1046 if (hasDeadlinePassed(deadline)) {
1047 return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, nullptr};
1048 }
1049
1050 return CpuPreparedModel::create(model);
1051 }
1052
allocate(const MemoryDescriptor & desc,OperandType type) const1053 std::pair<int, std::unique_ptr<RuntimeMemory>> CpuDevice::allocate(const MemoryDescriptor& desc,
1054 OperandType type) const {
1055 uint32_t size = TypeManager::get()->getSizeOfData(type, desc.dimensions);
1056 if (size == 0) {
1057 LOG(ERROR) << "CpuDevice::allocate -- does not support unknown dimensions.";
1058 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1059 }
1060 return MemoryAshmem::create(size);
1061 }
1062
create(Model model)1063 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuPreparedModel::create(Model model) {
1064 std::vector<RunTimePoolInfo> poolInfos;
1065 if (!setRunTimePoolInfosFromCanonicalMemories(&poolInfos, model.pools)) {
1066 return {ANEURALNETWORKS_UNMAPPABLE, nullptr};
1067 }
1068
1069 std::shared_ptr<RuntimePreparedModel> preparedModel =
1070 std::make_shared<CpuPreparedModel>(std::move(model), std::move(poolInfos));
1071 return {ANEURALNETWORKS_NO_ERROR, std::move(preparedModel)};
1072 }
1073
computeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration)1074 static std::tuple<int, std::vector<OutputShape>, Timing> computeOnCpu(
1075 const Model& model, const Request& request,
1076 const std::vector<RunTimePoolInfo>& modelPoolInfos,
1077 const std::vector<RunTimePoolInfo>& requestPoolInfos, const OptionalTimePoint& deadline,
1078 const OptionalDuration& loopTimeoutDuration) {
1079 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
1080 CpuExecutor executor;
1081 if (loopTimeoutDuration.has_value()) {
1082 executor.setLoopTimeout(loopTimeoutDuration->count());
1083 }
1084 if (deadline.has_value()) {
1085 executor.setDeadline(*deadline);
1086 }
1087 int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
1088 const auto& outputShapes = executor.getOutputShapes();
1089 return {err, outputShapes, {}};
1090 }
1091
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & duration,const std::vector<TokenValuePair> &) const1092 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuPreparedModel::executeFenced(
1093 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1094 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
1095 MeasureTiming measure, const OptionalTimePoint& deadline,
1096 const OptionalDuration& loopTimeoutDuration, const OptionalDuration& duration,
1097 const std::vector<TokenValuePair>& /*metaData*/) const {
1098 VLOG(EXECUTION)
1099 << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
1100 for (int syncFd : waitFor) {
1101 if (syncFd > 0) {
1102 auto r = syncWait(syncFd, -1);
1103 if (r != FenceState::SIGNALED) {
1104 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1105 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1106 }
1107 }
1108 }
1109
1110 // Update deadline if the timeout duration is closer than the deadline.
1111 auto closestDeadline = deadline;
1112 if (duration.has_value()) {
1113 const auto timeoutDurationDeadline = makeDeadline(*duration);
1114 if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1115 closestDeadline = timeoutDurationDeadline;
1116 }
1117 }
1118
1119 const auto [result, outputShapes, timing] = execute(inputs, outputs, memories, nullptr, measure,
1120 closestDeadline, loopTimeoutDuration, {});
1121 return {result, -1, nullptr, timing};
1122 }
1123
createCpuRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)1124 static std::tuple<int, Request, std::vector<RunTimePoolInfo>> createCpuRequest(
1125 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1126 const std::vector<const RuntimeMemory*>& memories) {
1127 std::vector<RunTimePoolInfo> requestPoolInfos;
1128 requestPoolInfos.reserve(memories.size());
1129 for (const RuntimeMemory* mem : memories) {
1130 if (std::optional<RunTimePoolInfo> poolInfo = mem->getRunTimePoolInfo()) {
1131 requestPoolInfos.emplace_back(*poolInfo);
1132 } else {
1133 return {ANEURALNETWORKS_UNMAPPABLE, {}, {}};
1134 }
1135 }
1136 // Create as many pools as there are input / output.
1137 auto fixPointerArguments =
1138 [&requestPoolInfos](const std::vector<ModelArgumentInfo>& argumentInfos) {
1139 std::vector<DataLocation> ptrArgsLocations;
1140 for (const ModelArgumentInfo& argumentInfo : argumentInfos) {
1141 if (argumentInfo.state() == ModelArgumentInfo::POINTER) {
1142 ptrArgsLocations.push_back(
1143 {.poolIndex = static_cast<uint32_t>(requestPoolInfos.size()),
1144 .offset = 0,
1145 .length = argumentInfo.length(),
1146 .padding = argumentInfo.padding()});
1147 requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
1148 static_cast<uint8_t*>(argumentInfo.buffer())));
1149 }
1150 }
1151 return ptrArgsLocations;
1152 };
1153 const std::vector<DataLocation> inputPtrArgsLocations = fixPointerArguments(inputs);
1154 const std::vector<DataLocation> outputPtrArgsLocations = fixPointerArguments(outputs);
1155
1156 Request request;
1157 request.inputs = createRequestArguments(inputs, inputPtrArgsLocations);
1158 request.outputs = createRequestArguments(outputs, outputPtrArgsLocations);
1159 return {ANEURALNETWORKS_NO_ERROR, std::move(request), std::move(requestPoolInfos)};
1160 }
1161
1162 // Perform computation on NNAPI CPU reference implementation.
1163 //
1164 // Contrary to DriverPreparedModel::execute, the NNAPI CPU reference executor lives in the
1165 // same process as the NNAPI runtime and can take raw pointers. We will create as many pools as
1166 // there are input/output in this method to avoid data copying.
1167 //
1168 // Will choose between sync/async execution according to DeviceManager::mSyncExecCpu.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst &,MeasureTiming,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1169 std::tuple<int, std::vector<OutputShape>, Timing> CpuPreparedModel::execute(
1170 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1171 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& /*burstController*/,
1172 MeasureTiming /*measure*/, const OptionalTimePoint& deadline,
1173 const OptionalDuration& loopTimeoutDuration,
1174 const std::vector<TokenValuePair>& /*metaData*/) const {
1175 if (hasDeadlinePassed(deadline)) {
1176 return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1177 }
1178
1179 int nCreateRequest;
1180 Request request;
1181 std::vector<RunTimePoolInfo> requestPoolInfos;
1182 std::tie(nCreateRequest, request, requestPoolInfos) =
1183 createCpuRequest(inputs, outputs, memories);
1184 if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1185 return {nCreateRequest, {}, {}};
1186 }
1187
1188 if (!DeviceManager::get()->syncExecCpu()) {
1189 // TODO: use a thread pool
1190 // TODO(mikie): this could have NNTRACE so we could measure the overhead
1191 // of spinning up a new thread.
1192 std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1193 std::thread([this, &request, &requestPoolInfos, &deadline, &loopTimeoutDuration, &result] {
1194 result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1195 loopTimeoutDuration);
1196 }).join();
1197 return result;
1198 }
1199
1200 return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1201 loopTimeoutDuration);
1202 }
1203
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1204 std::pair<int, std::shared_ptr<RuntimeExecution>> CpuPreparedModel::createReusableExecution(
1205 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1206 const std::vector<const RuntimeMemory*>& memories, MeasureTiming /*measure*/,
1207 const OptionalDuration& loopTimeoutDuration,
1208 const std::vector<TokenValuePair>& /*metaData*/) const {
1209 auto [nCreateRequest, request, requestPoolInfos] = createCpuRequest(inputs, outputs, memories);
1210 if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1211 return {nCreateRequest, nullptr};
1212 }
1213 auto execution = std::make_shared<CpuExecution>(
1214 *this, std::move(request), std::move(requestPoolInfos), loopTimeoutDuration);
1215 return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
1216 }
1217
compute(const SharedBurst &,const OptionalTimePoint & deadline) const1218 std::tuple<int, std::vector<OutputShape>, Timing> CpuExecution::compute(
1219 const SharedBurst& /*burstController*/, const OptionalTimePoint& deadline) const {
1220 if (hasDeadlinePassed(deadline)) {
1221 return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1222 }
1223
1224 if (!DeviceManager::get()->syncExecCpu()) {
1225 // TODO: use a thread pool
1226 // TODO(mikie): this could have NNTRACE so we could measure the overhead
1227 // of spinning up a new thread.
1228 std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1229 std::thread([this, &deadline, &result] {
1230 result = computeOnCpu(kPreparedModel.getModel(), kRequest,
1231 kPreparedModel.getModelPoolInfos(), kRequestPoolInfos, deadline,
1232 kLoopTimeoutDuration);
1233 }).join();
1234 return result;
1235 }
1236
1237 return computeOnCpu(kPreparedModel.getModel(), kRequest, kPreparedModel.getModelPoolInfos(),
1238 kRequestPoolInfos, deadline, kLoopTimeoutDuration);
1239 }
1240
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & duration) const1241 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuExecution::computeFenced(
1242 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
1243 const OptionalDuration& duration) const {
1244 VLOG(EXECUTION)
1245 << "CpuExecution::computeFenced wait for sync fences to signal before execution";
1246 for (int syncFd : waitFor) {
1247 if (syncFd > 0) {
1248 auto r = syncWait(syncFd, -1);
1249 if (r != FenceState::SIGNALED) {
1250 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1251 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1252 }
1253 }
1254 }
1255
1256 // Update deadline if the timeout duration is closer than the deadline.
1257 auto closestDeadline = deadline;
1258 if (duration.has_value()) {
1259 const auto timeoutDurationDeadline = makeDeadline(*duration);
1260 if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1261 closestDeadline = timeoutDurationDeadline;
1262 }
1263 }
1264
1265 const auto [result, outputShapes, timing] = compute(nullptr, closestDeadline);
1266 return {result, -1, nullptr, timing};
1267 }
1268
getRuntimeFeatureLevel() const1269 int64_t DeviceManager::getRuntimeFeatureLevel() const {
1270 return versionToFeatureLevel(mRuntimeVersion.level);
1271 }
1272
get()1273 DeviceManager* DeviceManager::get() {
1274 static DeviceManager manager;
1275 return &manager;
1276 }
1277
getCpuDevice()1278 std::shared_ptr<Device> DeviceManager::getCpuDevice() {
1279 return CpuDevice::get();
1280 }
1281
forTest_makeDriverDevice(const SharedDevice & device)1282 std::shared_ptr<Device> DeviceManager::forTest_makeDriverDevice(const SharedDevice& device) {
1283 VLOG(MANAGER) << "forTest_makeDriverDevice(" << device->getName() << ")";
1284 const auto driverDevice = DriverDevice::create(device);
1285 CHECK(driverDevice != nullptr);
1286 return driverDevice;
1287 }
1288
1289 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
getDriverDevices(Version::Level maxFeatureLevelAllowed)1290 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1291 [[maybe_unused]] Version::Level maxFeatureLevelAllowed) {
1292 #ifdef __ANDROID__
1293 auto devices = hardware::neuralnetworks::service::getDevices(maxFeatureLevelAllowed);
1294
1295 std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1296 driverDevices.reserve(devices.size());
1297 for (auto& device : devices) {
1298 driverDevices.push_back(DriverDevice::create(std::move(device)));
1299 }
1300 return driverDevices;
1301 #else // __ANDROID__
1302 return {};
1303 #endif // __ANDROID__
1304 }
1305 #else
getDriverDevices(Version::Level)1306 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1307 Version::Level /*maxFeatureLevelAllowed*/) {
1308 auto devices = getDevices();
1309 std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1310 driverDevices.reserve(devices.size());
1311 for (auto& device : devices) {
1312 driverDevices.push_back(DriverDevice::create(std::move(device)));
1313 }
1314 return driverDevices;
1315 }
1316 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
1317
findAvailableDevices()1318 void DeviceManager::findAvailableDevices() {
1319 VLOG(MANAGER) << "findAvailableDevices";
1320
1321 #ifdef NN_DEBUGGABLE
1322 // debug.nn.enabled-devices defines a regex pattern. For all available driver devices, only the
1323 // ones with name matching the pattern are enabled. Driver devices with unmatched names are
1324 // ignored. If this property is not set, all available driver devices are enabled by default.
1325 // This filter only applies to driver devices. nnapi-reference is always enabled.
1326 std::string patternStr = base::GetProperty("debug.nn.enabled-devices", ".*");
1327 LOG(INFO) << "Enabled devices: " << patternStr;
1328 const std::regex pattern(patternStr);
1329 #endif // NN_DEBUGGABLE
1330
1331 // register driver devices
1332 auto driverDevices = getDriverDevices(mRuntimeVersion.level);
1333 for (auto& driverDevice : driverDevices) {
1334 #ifdef NN_DEBUGGABLE
1335 if (!std::regex_match(driverDevice->getName(), pattern)) {
1336 LOG(INFO) << "Ignored interface " << driverDevice->getName()
1337 << " (version = " << driverDevice->getVersionString() << ")";
1338 continue;
1339 }
1340 #endif // NN_DEBUGGABLE
1341 LOG(INFO) << "Found interface " << driverDevice->getName()
1342 << " (version = " << driverDevice->getVersionString() << ")";
1343 mDevices.push_back(std::move(driverDevice));
1344 }
1345
1346 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
1347 // register CPU fallback device
1348 mDevices.push_back(CpuDevice::get());
1349 mDevicesCpuOnly.push_back(CpuDevice::get());
1350 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
1351 }
1352
registerDevice(const SharedDevice & device)1353 void DeviceManager::registerDevice(const SharedDevice& device) {
1354 if (auto driverDevice = DriverDevice::create(device)) {
1355 mDevices.push_back(std::move(driverDevice));
1356 }
1357 }
1358
DeviceManager()1359 DeviceManager::DeviceManager() {
1360 VLOG(MANAGER) << "DeviceManager::DeviceManager";
1361 mRuntimeVersion = getRuntimeFeatureLevelVersion();
1362 mIsPlatformTelemetryEnabled = getWhetherPlatformTelemetryIsEnabled();
1363 findAvailableDevices();
1364 #ifdef NN_DEBUGGABLE
1365 mStrictSlicing = (getProp("debug.nn.strict-slicing") != 0);
1366 mPartitioning = getProp("debug.nn.partition", kPartitioningDefault);
1367 mDebugNNCpuOnly = (getProp("debug.nn.cpuonly") != 0);
1368 mSyncExecCpu = (getProp("debug.nn.syncexec-cpu", 1) != 0);
1369 mSyncExecRuntime = (getProp("debug.nn.syncexec-runtime") != 0);
1370 #endif // NN_DEBUGGABLE
1371 }
1372
1373 } // namespace nn
1374 } // namespace android
1375