• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Manager"
18 
19 #include "Manager.h"
20 
21 #include <CpuExecutor.h>
22 #include <LegacyUtils.h>
23 #include <MetaModel.h>
24 #include <Tracing.h>
25 #include <android-base/properties.h>
26 #include <nnapi/IBurst.h>
27 #include <nnapi/IDevice.h>
28 #include <nnapi/IExecution.h>
29 #include <nnapi/IPreparedModel.h>
30 #include <nnapi/SharedMemory.h>
31 #include <nnapi/TypeUtils.h>
32 #include <nnapi/Types.h>
33 #include <nnapi/Validation.h>
34 
35 #include <algorithm>
36 #include <functional>
37 #include <iterator>
38 #include <map>
39 #include <memory>
40 #include <regex>
41 #include <set>
42 #include <string>
43 #include <tuple>
44 #include <utility>
45 #include <vector>
46 
47 #include "ExecutionCallback.h"
48 #include "Memory.h"
49 #include "ModelArgumentInfo.h"
50 #include "ServerFlag.h"
51 #include "TypeManager.h"
52 
53 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
54 #include <build/version.h>
55 #include <cutils/native_handle.h>
56 #include <nnapi/hal/1.3/Buffer.h>
57 #include <nnapi/hal/Service.h>
58 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
59 
60 #ifdef NN_EXPERIMENTAL_FEATURE
61 #include "NeuralNetworksExperimentalFeatures.h"
62 #endif  // NN_EXPERIMENTAL_FEATURE
63 
64 namespace android {
65 namespace nn {
66 namespace {
67 
getRuntimeFeatureLevelVersionHelper()68 Version getRuntimeFeatureLevelVersionHelper() {
69 #if defined(NN_EXPERIMENTAL_FEATURE) && defined(NN_COMPATIBILITY_LIBRARY_BUILD)
70 #error "NN_EXPERIMENTAL_FEATURE is not supported when NN_COMPATIBILITY_LIBRARY_BUILD is defined"
71 #elif defined(NN_EXPERIMENTAL_FEATURE)
72     auto version = kVersionFeatureLevelExperimental;
73     // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
74     // features that are only available in the runtime.
75     version.runtimeOnlyFeatures = true;
76 #elif defined(NN_COMPATIBILITY_LIBRARY_BUILD)
77     auto version = serverFeatureLevelToVersion(kMaxFeatureLevelNum);
78 #else   // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
79     auto version = serverFeatureLevelToVersion(getServerFeatureLevelFlag());
80     // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
81     // features that are only available in the runtime.
82     version.runtimeOnlyFeatures = true;
83 #endif  // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
84     return version;
85 }
86 
getRuntimeFeatureLevelVersion()87 Version getRuntimeFeatureLevelVersion() {
88     static const Version version = getRuntimeFeatureLevelVersionHelper();
89     return version;
90 }
91 
getWhetherPlatformTelemetryIsEnabled()92 bool getWhetherPlatformTelemetryIsEnabled() {
93 #if !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
94     return getServerTelemetryEnableFlag();
95 #else   // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
96     return false;
97 #endif  // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
98 }
99 
100 }  // namespace
101 
102 // A Device with actual underlying driver
103 class DriverDevice : public Device {
104    public:
105     // Create a DriverDevice from a name and a DeviceFactory function.
106     // Returns nullptr on failure.
107     static std::shared_ptr<DriverDevice> create(SharedDevice device);
108 
109     // Prefer using DriverDevice::create
110     explicit DriverDevice(SharedDevice device);
111 
getName() const112     const std::string& getName() const override { return kInterface->getName(); }
getVersionString() const113     const std::string& getVersionString() const override { return kInterface->getVersionString(); }
getFeatureLevel() const114     Version getFeatureLevel() const override { return kInterface->getFeatureLevel(); }
getType() const115     int32_t getType() const override { return static_cast<int32_t>(kInterface->getType()); }
getSupportedExtensions() const116     const std::vector<Extension>& getSupportedExtensions() const override {
117         return kInterface->getSupportedExtensions();
118     }
119     std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const120     const Capabilities& getCapabilities() const override { return kInterface->getCapabilities(); }
getPerformance(OperandType type) const121     Capabilities::PerformanceInfo getPerformance(OperandType type) const override {
122         return getCapabilities().operandPerformance.lookup(type);
123     }
getRelaxedFloat32toFloat16PerformanceScalar() const124     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
125         return getCapabilities().relaxedFloat32toFloat16PerformanceScalar;
126     }
getRelaxedFloat32toFloat16PerformanceTensor() const127     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
128         return getCapabilities().relaxedFloat32toFloat16PerformanceTensor;
129     }
getIfPerformance() const130     Capabilities::PerformanceInfo getIfPerformance() const override {
131         return getCapabilities().ifPerformance;
132     }
getWhilePerformance() const133     Capabilities::PerformanceInfo getWhilePerformance() const override {
134         return getCapabilities().whilePerformance;
135     }
getNumberOfCacheFilesNeeded() const136     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
137         return kInterface->getNumberOfCacheFilesNeeded();
138     }
isCachingSupported() const139     bool isCachingSupported() const override {
140         // Caching is supported if either of numModelCache or numDataCache is greater than 0.
141         const auto [numModelCacheFiles, numDataCacheFiles] = getNumberOfCacheFilesNeeded();
142         return numModelCacheFiles > 0 || numDataCacheFiles > 0;
143     }
wait() const144     int wait() const override {
145         auto result = kInterface->wait();
146         if (!result.ok()) {
147             LOG(ERROR) << "DriverDevice::wait error: " << result.error().message;
148             return convertErrorStatusToResultCode(result.error().code);
149         }
150         return ANEURALNETWORKS_NO_ERROR;
151     }
152 
153     std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
154             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
155             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
156             const std::optional<CacheToken>& maybeToken,
157             const std::vector<TokenValuePair>& metaData,
158             const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
159 
160     std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
161                                                             OperandType) const override;
162 
163    private:
164     const SharedDevice kInterface;
165 
166     GeneralResult<std::vector<bool>> getSupportedOperationsImpl(const MetaModel& metaModel) const;
167     GeneralResult<SharedPreparedModel> prepareModelFromCacheInternal(
168             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
169             const CacheToken& token) const;
170 
171 #ifdef NN_DEBUGGABLE
172     // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
173     // 0 - all operations reported by IDevice::getSupportedOperations() supported
174     // 1 - some operations reported by IDevice::getSupportedOperations() supported
175     uint32_t mSupported = 0;
176 #endif  // NN_DEBUGGABLE
177 };
178 
179 // A RuntimePreparedModel with underlying IPreparedModel instance return by actual driver.
180 class DriverPreparedModel : public RuntimePreparedModel {
181    public:
DriverPreparedModel(const Device * device,const SharedPreparedModel & preparedModel)182     DriverPreparedModel(const Device* device, const SharedPreparedModel& preparedModel)
183         : mDevice(device), mPreparedModel(preparedModel) {
184         CHECK(mDevice != nullptr);
185         CHECK(mPreparedModel != nullptr);
186     }
187 
getDevice() const188     const Device* getDevice() const override { return mDevice; }
getInterface() const189     SharedPreparedModel getInterface() const override { return mPreparedModel; }
190 
191     std::tuple<int, std::vector<OutputShape>, Timing> execute(
192             const std::vector<ModelArgumentInfo>& inputs,
193             const std::vector<ModelArgumentInfo>& outputs,
194             const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
195             MeasureTiming measure, const OptionalTimePoint& deadline,
196             const OptionalDuration& loopTimeoutDuration,
197             const std::vector<TokenValuePair>& metaData) const override;
198 
199     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
200             const std::vector<ModelArgumentInfo>& inputs,
201             const std::vector<ModelArgumentInfo>& outputs,
202             const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
203             MeasureTiming measure, const OptionalTimePoint& deadline,
204             const OptionalDuration& loopTimeoutDuration,
205             const OptionalDuration& timeoutDurationAfterFence,
206             const std::vector<TokenValuePair>& metaData) const override;
207 
208     std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
209             const std::vector<ModelArgumentInfo>& inputs,
210             const std::vector<ModelArgumentInfo>& outputs,
211             const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
212             const OptionalDuration& loopTimeoutDuration,
213             const std::vector<TokenValuePair>& metaData) const override;
214 
configureExecutionBurst() const215     GeneralResult<SharedBurst> configureExecutionBurst() const override {
216         return mPreparedModel->configureExecutionBurst();
217     }
218 
getMemoryPreference() const219     MemoryPreference getMemoryPreference() const override {
220         if (isCompliantVersion(kVersionFeatureLevel5, mDevice->getFeatureLevel())) {
221             return {kDefaultRequestMemoryAlignment, kDefaultRequestMemoryPadding};
222         } else {
223             // We are not able to pass memory padding information to HIDL drivers, so return the
224             // minimum padding.
225             return {kDefaultRequestMemoryAlignment, kMinMemoryPadding};
226         }
227     }
228 
229    private:
230     const Device* mDevice;
231     const SharedPreparedModel mPreparedModel;
232 };
233 
234 class DriverExecution : public RuntimeExecution {
235    public:
DriverExecution(SharedExecution execution,Request request,std::vector<const RuntimeMemory * > memories,MeasureTiming measure,OptionalDuration loopTimeoutDuration,Version deviceFeatureLevel,const std::vector<TokenValuePair> & metaData)236     DriverExecution(SharedExecution execution, Request request,
237                     std::vector<const RuntimeMemory*> memories, MeasureTiming measure,
238                     OptionalDuration loopTimeoutDuration, Version deviceFeatureLevel,
239                     const std::vector<TokenValuePair>& metaData)
240         : kExecution(std::move(execution)),
241           kRequest(std::move(request)),
242           kMemories(std::move(memories)),
243           kMeasure(measure),
244           kLoopTimeoutDuration(std::move(loopTimeoutDuration)),
245           kDeviceFeatureLevel(deviceFeatureLevel),
246           kMetaData(metaData) {
247         CHECK(kExecution != nullptr);
248     }
249 
250     std::tuple<int, std::vector<OutputShape>, Timing> compute(
251             const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
252 
253     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
254             const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
255             const OptionalDuration& timeoutDurationAfterFence) const override;
256 
257    private:
258     const SharedExecution kExecution;
259 
260     // For burst execution.
261     const Request kRequest;
262     const std::vector<const RuntimeMemory*> kMemories;
263     const MeasureTiming kMeasure;
264     const OptionalDuration kLoopTimeoutDuration;
265     mutable std::map<const IBurst*, SharedExecution> mCachedBurstExecutions;
266 
267     // For fenced execution.
268     const Version kDeviceFeatureLevel;
269 
270     // Execution metadata.
271     std::vector<TokenValuePair> kMetaData;
272 };
273 
DriverDevice(SharedDevice device)274 DriverDevice::DriverDevice(SharedDevice device) : kInterface(std::move(device)) {
275     CHECK(kInterface != nullptr);
276 #ifdef NN_DEBUGGABLE
277     static const char samplePrefix[] = "sample";
278     if (getName().substr(0, sizeof(samplePrefix) - 1) == samplePrefix) {
279         mSupported = getProp("debug.nn.sample.supported");
280     }
281 #endif  // NN_DEBUGGABLE
282 }
283 
create(SharedDevice device)284 std::shared_ptr<DriverDevice> DriverDevice::create(SharedDevice device) {
285     if (device == nullptr) {
286         LOG(ERROR) << "DriverDevice::create called with nullptr";
287         return nullptr;
288     }
289 
290     return std::make_shared<DriverDevice>(std::move(device));
291 }
292 
versionToFeatureLevel(Version::Level versionLevel)293 int64_t DeviceManager::versionToFeatureLevel(Version::Level versionLevel) {
294     switch (versionLevel) {
295         case Version::Level::FEATURE_LEVEL_1:
296             return ANEURALNETWORKS_FEATURE_LEVEL_1;
297         case Version::Level::FEATURE_LEVEL_2:
298             return ANEURALNETWORKS_FEATURE_LEVEL_2;
299         case Version::Level::FEATURE_LEVEL_3:
300             return ANEURALNETWORKS_FEATURE_LEVEL_3;
301         case Version::Level::FEATURE_LEVEL_4:
302             return ANEURALNETWORKS_FEATURE_LEVEL_4;
303         case Version::Level::FEATURE_LEVEL_5:
304             return ANEURALNETWORKS_FEATURE_LEVEL_5;
305         case Version::Level::FEATURE_LEVEL_6:
306             return ANEURALNETWORKS_FEATURE_LEVEL_6;
307         case Version::Level::FEATURE_LEVEL_7:
308             return ANEURALNETWORKS_FEATURE_LEVEL_7;
309         case Version::Level::FEATURE_LEVEL_8:
310             return ANEURALNETWORKS_FEATURE_LEVEL_8;
311 #ifdef NN_EXPERIMENTAL_FEATURE
312         case Version::Level::FEATURE_LEVEL_EXPERIMENTAL:
313             return ANEURALNETWORKS_FEATURE_LEVEL_EXPERIMENTAL;
314 #endif  // NN_EXPERIMENTAL_FEATURE
315     }
316     LOG(FATAL) << "Unrecognized version " << versionLevel;
317     return -1;
318 }
319 
getSupportedOperationsImpl(const MetaModel & metaModel) const320 GeneralResult<std::vector<bool>> DriverDevice::getSupportedOperationsImpl(
321         const MetaModel& metaModel) const {
322     const auto featureLevel = kInterface->getFeatureLevel();
323     const auto slice = metaModel.getSlice(featureLevel);
324     if (!slice.has_value()) {
325         return NN_ERROR() << "getSlice(" << featureLevel << ") failed";
326     }
327 
328     const auto& [sliceModel, slicedModelOperationIndexToModelOperationIndex] = *slice;
329     const std::vector<bool> supported = NN_TRY(kInterface->getSupportedOperations(sliceModel));
330     const uint32_t slicedOperationCount = sliceModel.main.operations.size();
331     if (supported.size() != slicedOperationCount) {
332         return NN_ERROR() << "IDevice::getSupportedOperations returned a vector of length "
333                           << supported.size() << " when expecting " << slicedOperationCount;
334     }
335 
336     const Model& model = metaModel.getModel();
337     const uint32_t operationCount = model.main.operations.size();
338     std::vector<bool> remappedSupported(operationCount, false);
339     for (size_t i = 0; i < supported.size(); ++i) {
340         if (supported[i]) {
341             remappedSupported[slicedModelOperationIndexToModelOperationIndex(i)] = true;
342         }
343     }
344     return remappedSupported;
345 }
346 
getSupportedOperations(const MetaModel & metaModel) const347 std::vector<bool> DriverDevice::getSupportedOperations(const MetaModel& metaModel) const {
348     const Model& model = metaModel.getModel();
349 
350     auto result = getSupportedOperationsImpl(metaModel);
351     if (!result.ok()) {
352         LOG(ERROR) << "getSupportedOperations failed with code " << result.error().code << ": "
353                    << result.error().message;
354         // Set the supported operation vectors to all false, so we won't use this driver.
355         return std::vector<bool>(model.main.operations.size(), false);
356     }
357 
358     std::vector<bool>& supportedOperations = result.value();
359 #ifdef NN_DEBUGGABLE
360     if (mSupported != 1) {
361         return supportedOperations;
362     }
363 
364     const uint32_t baseAccumulator = std::hash<std::string>{}(getName());
365     for (size_t operationIndex = 0; operationIndex < supportedOperations.size(); operationIndex++) {
366         if (!supportedOperations[operationIndex]) {
367             continue;
368         }
369 
370         uint32_t accumulator = baseAccumulator;
371         const Operation& operation = model.main.operations[operationIndex];
372         accumulator ^= static_cast<uint32_t>(operation.type);
373         auto accumulateOperands = [&model, &accumulator](const std::vector<uint32_t>& operands) {
374             for (uint32_t operandIndex : operands) {
375                 const Operand& operand = model.main.operands[operandIndex];
376                 accumulator ^= static_cast<uint32_t>(operand.type);
377                 accumulator ^= operand.dimensions.size();
378                 for (const Dimension& dimension : operand.dimensions) {
379                     accumulator ^= dimension;
380                     if (operand.lifetime == Operand::LifeTime::CONSTANT_COPY ||
381                         operand.lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
382                         operand.lifetime == Operand::LifeTime::POINTER) {
383                         accumulator ^= 1;
384                     }
385                 }
386             }
387         };
388         accumulateOperands(operation.inputs);
389         accumulateOperands(operation.outputs);
390         if (accumulator & 1) {
391             supportedOperations[operationIndex] = false;
392         }
393     }
394 #endif  // NN_DEBUGGABLE
395 
396     return supportedOperations;
397 }
398 
399 // Opens a cache file for reading and writing and returns a shared handle.
createCacheHandle(const std::string & filename,bool createIfNotExist)400 static GeneralResult<SharedHandle> createCacheHandle(const std::string& filename,
401                                                      bool createIfNotExist) {
402     auto fd = base::unique_fd(open(filename.c_str(), createIfNotExist ? (O_RDWR | O_CREAT) : O_RDWR,
403                                    S_IRUSR | S_IWUSR));
404     if (!fd.ok()) {
405         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
406                << "Failed to " << (createIfNotExist ? "open or create" : "open") << " cache file "
407                << filename;
408     }
409     return std::make_shared<const Handle>(std::move(fd));
410 }
411 
412 // Opens a list of cache files and returns a vector of shared handles. The files
413 // are always opened with both read and write permissions.
createCacheHandleVec(uint32_t numCacheFiles,const std::string & baseFilename,bool createIfNotExist)414 static GeneralResult<std::vector<SharedHandle>> createCacheHandleVec(
415         uint32_t numCacheFiles, const std::string& baseFilename, bool createIfNotExist) {
416     CHECK(numCacheFiles <= kMaxNumberOfCacheFiles);
417     std::vector<SharedHandle> handles;
418     handles.reserve(numCacheFiles);
419     for (uint32_t i = 0; i < numCacheFiles; i++) {
420         std::string filename = baseFilename + std::to_string(i);
421         VLOG(COMPILATION) << "Cache " << i << ": " << filename;
422         handles.push_back(NN_TRY(createCacheHandle(filename, createIfNotExist)));
423     }
424     return handles;
425 }
426 
427 // Maps a token to cache file names and returns a pair of vectors of shared
428 // handles to the opened files.
getCacheHandles(const CacheInfo & cacheInfo,const CacheToken & token,const std::pair<uint32_t,uint32_t> & numCacheFiles,bool createIfNotExist)429 static GeneralResult<CacheHandles> getCacheHandles(
430         const CacheInfo& cacheInfo, const CacheToken& token,
431         const std::pair<uint32_t, uint32_t>& numCacheFiles, bool createIfNotExist) {
432     if (const auto* cacheHandles = std::get_if<CacheHandles>(&cacheInfo.variant)) {
433         if (cacheHandles->modelCache.size() != numCacheFiles.first) {
434             return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
435                    << "Expected " << numCacheFiles.first << " model cache handles, got "
436                    << cacheHandles->modelCache.size();
437         }
438         if (cacheHandles->dataCache.size() != numCacheFiles.second) {
439             return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
440                    << "Expected " << numCacheFiles.second << " data cache handles, got "
441                    << cacheHandles->dataCache.size();
442         }
443         return *cacheHandles;
444     }
445 
446     // The filename includes kByteSizeOfCacheToken * 2 characters for token,
447     // and 1 character for model/data cache identifier.
448     std::string filename(kByteSizeOfCacheToken * 2 + 1, '0');
449     for (uint32_t i = 0; i < kByteSizeOfCacheToken; i++) {
450         filename[i * 2] = 'A' + (token[i] & 0x0F);
451         filename[i * 2 + 1] = 'A' + (token[i] >> 4);
452     }
453 
454     const auto& cacheDir = std::get<CacheDir>(cacheInfo.variant);
455     CHECK(cacheDir.empty() || cacheDir.back() == '/');
456     std::string cacheFileName = cacheDir + filename;
457     const uint32_t cacheTypeIdentifierIndex = cacheDir.size() + kByteSizeOfCacheToken * 2;
458 
459     cacheFileName[cacheTypeIdentifierIndex] = '1';
460     std::vector<SharedHandle> modelCache =
461             NN_TRY(createCacheHandleVec(numCacheFiles.first, cacheFileName, createIfNotExist));
462 
463     cacheFileName[cacheTypeIdentifierIndex] = '2';
464     std::vector<SharedHandle> dataCache =
465             NN_TRY(createCacheHandleVec(numCacheFiles.second, cacheFileName, createIfNotExist));
466 
467     return CacheHandles{
468             .modelCache = std::move(modelCache),
469             .dataCache = std::move(dataCache),
470     };
471 }
472 
prepareModelFromCacheInternal(const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const CacheToken & token) const473 GeneralResult<SharedPreparedModel> DriverDevice::prepareModelFromCacheInternal(
474         const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
475         const CacheToken& token) const {
476     // Get cache files if they exist, otherwise return from the function early.
477     auto cache = NN_TRY(getCacheHandles(cacheInfo, token, kInterface->getNumberOfCacheFilesNeeded(),
478                                         /*createIfNotExist=*/false));
479     return kInterface->prepareModelFromCache(deadline, cache.modelCache, cache.dataCache, token);
480 }
481 
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> & metaData,const std::vector<ExtensionNameAndPrefix> & extensionNameAndPrefix) const482 std::pair<int, std::shared_ptr<RuntimePreparedModel>> DriverDevice::prepareModel(
483         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
484         const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
485         const std::optional<CacheToken>& maybeToken, const std::vector<TokenValuePair>& metaData,
486         const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const {
487     // Attempt to compile from cache if token is present.
488     if (maybeToken.has_value()) {
489         auto result = prepareModelFromCacheInternal(deadline, cacheInfo, *maybeToken);
490         if (result.has_value()) {
491             LOG(INFO) << "prepareModelFromCache: successfully prepared model from cache";
492             return {ANEURALNETWORKS_NO_ERROR,
493                     std::make_shared<DriverPreparedModel>(this, std::move(result).value())};
494         } else {
495             LOG(ERROR) << "prepareModelFromCache failure (" << result.error().code
496                        << "): " << result.error().message;
497         }
498     }
499 
500     // Get cache files if they exist, otherwise create them.
501     CacheHandles cache;
502     if (maybeToken.has_value()) {
503         auto result =
504                 getCacheHandles(cacheInfo, *maybeToken, kInterface->getNumberOfCacheFilesNeeded(),
505                                 /*createIfNotExist=*/true);
506         if (result.has_value()) {
507             cache = std::move(result).value();
508         } else {
509             LOG(ERROR) << "getCacheHandles failure (" << result.error().code
510                        << "): " << result.error().message;
511         }
512     }
513 
514     // Get the token if it exists, otherwise get a null token.
515     static constexpr CacheToken kNullToken = {};
516     const CacheToken token = maybeToken.value_or(kNullToken);
517 
518     // Fallback to full compilation (possibly with token) if
519     // prepareModelFromCache could not be used or failed.
520     const Model model = makeModel();
521     auto result =
522             kInterface->prepareModel(model, preference, priority, deadline, cache.modelCache,
523                                      cache.dataCache, token, metaData, extensionNameAndPrefix);
524     if (!result.ok()) {
525         LOG(ERROR) << "IDevice::prepareModel() error: " << result.error().message;
526         return {convertErrorStatusToResultCode(result.error().code), nullptr};
527     }
528     SharedPreparedModel preparedModel = std::move(result).value();
529     CHECK(preparedModel != nullptr)
530             << "IDevice::prepareModel() returned nullptr without error code";
531     return {ANEURALNETWORKS_NO_ERROR,
532             std::make_shared<DriverPreparedModel>(this, std::move(preparedModel))};
533 }
534 
allocate(const MemoryDescriptor & desc,OperandType) const535 std::pair<int, std::unique_ptr<RuntimeMemory>> DriverDevice::allocate(const MemoryDescriptor& desc,
536                                                                       OperandType) const {
537     const BufferDesc bufferDesc = {.dimensions = desc.dimensions};
538     std::vector<SharedPreparedModel> preparedModels(desc.preparedModels.size());
539     std::transform(desc.preparedModels.begin(), desc.preparedModels.end(), preparedModels.begin(),
540                    [](const auto* preparedModel) {
541                        const auto versionedPreparedModel = preparedModel->getInterface();
542                        CHECK(versionedPreparedModel != nullptr);
543                        return versionedPreparedModel;
544                    });
545     auto result =
546             kInterface->allocate(bufferDesc, preparedModels, desc.inputRoles, desc.outputRoles);
547     if (!result.ok()) {
548         LOG(ERROR) << "DriverDevice::allocate -- memory allocation on device " << getName()
549                    << " failed!";
550         return {convertErrorStatusToResultCode(result.error().code), nullptr};
551     }
552     return MemoryFromDevice::create(std::move(result).value());
553 }
554 
createDriverRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)555 static Request createDriverRequest(const std::vector<ModelArgumentInfo>& inputs,
556                                    const std::vector<ModelArgumentInfo>& outputs,
557                                    const std::vector<const RuntimeMemory*>& memories) {
558     Request request;
559     request.inputs.reserve(inputs.size());
560     std::transform(inputs.begin(), inputs.end(), std::back_inserter(request.inputs),
561                    [](const auto& input) { return input.createRequestArgument(); });
562     request.outputs.reserve(outputs.size());
563     std::transform(outputs.begin(), outputs.end(), std::back_inserter(request.outputs),
564                    [](const auto& output) { return output.createRequestArgument(); });
565     request.pools.reserve(memories.size());
566     std::transform(memories.begin(), memories.end(), std::back_inserter(request.pools),
567                    [](const RuntimeMemory* memory) { return memory->getMemoryPool(); });
568     return request;
569 }
570 
571 // Perform computation on an actual device driver.
572 //
573 // Because HIDL cannot take raw pointers, two separate memory pools will be allocated for inputs and
574 // outputs specified by pointers. The input pointer data will be copied to the input pool prior to
575 // execution, and the output pointer data will be copied out from the output pool after the
576 // execution.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst & burstController,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const577 std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
578         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
579         const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
580         MeasureTiming measure, const OptionalTimePoint& deadline,
581         const OptionalDuration& loopTimeoutDuration,
582         const std::vector<TokenValuePair>& metaData) const {
583     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
584 
585     auto request = createDriverRequest(inputs, outputs, memories);
586 
587     NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::execute::execute");
588 
589     ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> result;
590 
591     // compute using burst if present, otherwise compute from IPreparedModel
592     const bool burstCompute = (burstController != nullptr);
593     if (burstCompute) {
594         for (const RuntimeMemory* memory : memories) {
595             const auto pool = memory->getMemoryPool();
596             if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
597                 auto cacheHold = burstController->cacheMemory(*maybeMemory);
598                 memory->hold(cacheHold);
599             }
600         }
601 
602         VLOG(EXECUTION) << "Before burstController->execute() " << SHOW_IF_DEBUG(request);
603         result = burstController->execute(request, measure, deadline, loopTimeoutDuration, metaData,
604                                           TypeManager::get()->getExtensionNameAndPrefix(metaData));
605     } else {
606         result = mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
607                                          TypeManager::get()->getExtensionNameAndPrefix(metaData));
608     }
609 
610     int n = ANEURALNETWORKS_OP_FAILED;
611     std::vector<OutputShape> outputShapes;
612     Timing timing;
613 
614     if (result.ok()) {
615         n = ANEURALNETWORKS_NO_ERROR;
616         std::tie(outputShapes, timing) = std::move(result).value();
617     } else {
618         auto [message, code, returnedOutputShapes] = std::move(result).error();
619         VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
620         LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
621                    << "::execute(...) error: " << message;
622         n = convertErrorStatusToResultCode(code);
623         if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
624             outputShapes = std::move(returnedOutputShapes);
625         }
626         return {n, std::move(outputShapes), timing};
627     }
628 
629     VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
630     return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
631 }
632 
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & timeoutDurationAfterFence,const std::vector<TokenValuePair> & metaData) const633 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverPreparedModel::executeFenced(
634         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
635         const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
636         MeasureTiming measure, const OptionalTimePoint& deadline,
637         const OptionalDuration& loopTimeoutDuration,
638         const OptionalDuration& timeoutDurationAfterFence,
639         const std::vector<TokenValuePair>& metaData) const {
640     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
641     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
642 
643     auto request = createDriverRequest(inputs, outputs, memories);
644 
645     NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::executeFenced");
646 
647     std::vector<SyncFence> waitForHandles;
648     waitForHandles.reserve(waitFor.size());
649     for (int fd : waitFor) {
650         int dupFd = dup(fd);
651         if (dupFd < 0) {
652             LOG(ERROR) << "Unable to dup the file descriptor";
653             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
654         }
655         waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
656     }
657 
658     SyncFence syncFence = SyncFence::createAsSignaled();
659     ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
660     Timing timing = {};
661     if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, mDevice->getFeatureLevel())) {
662         auto result = mPreparedModel->executeFenced(
663                 request, waitForHandles, measure, deadline, loopTimeoutDuration,
664                 timeoutDurationAfterFence, metaData,
665                 TypeManager::get()->getExtensionNameAndPrefix(metaData));
666         if (!result.ok()) {
667             LOG(ERROR) << "IPreparedModel::executeFenced() error: " << result.error().message;
668             VLOG(EXECUTION) << "**executeFenced failed**";
669             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
670         }
671         std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
672     } else {
673         // Fallback to synchronous execution if executeFenced is not supported.
674         // First wait for all sync fences to be ready.
675         LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
676         for (const auto& fence : waitForHandles) {
677             if (!fence.hasFd() || fence.getFd() < 0) {
678                 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
679             }
680             auto r = fence.syncWait({/* no timeout */});
681             if (r != SyncFence::FenceState::SIGNALED) {
682                 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
683                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
684             }
685         }
686         auto result =
687                 mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
688                                         TypeManager::get()->getExtensionNameAndPrefix(metaData));
689         if (!result.ok()) {
690             LOG(ERROR) << "IPreparedModel::execute() error: " << result.error().message;
691             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
692         }
693         std::tie(std::ignore, timing) = result.value();
694     }
695 
696     int syncFenceFd = -1;
697     if (syncFence.hasFd()) {
698         syncFenceFd = dup(syncFence.getFd());
699         if (syncFenceFd < 0) {
700             LOG(ERROR) << "Failed to dup the file descriptor";
701             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
702         }
703     }
704 
705     VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
706     return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
707 }
708 
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming measure,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const709 std::pair<int, std::shared_ptr<RuntimeExecution>> DriverPreparedModel::createReusableExecution(
710         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
711         const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
712         const OptionalDuration& loopTimeoutDuration,
713         const std::vector<TokenValuePair>& metaData) const {
714     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::createReusableExecution");
715 
716     auto request = createDriverRequest(inputs, outputs, memories);
717     auto result = mPreparedModel->createReusableExecution(
718             request, measure, loopTimeoutDuration, metaData,
719             TypeManager::get()->getExtensionNameAndPrefix(metaData));
720     if (!result.ok()) {
721         LOG(ERROR) << "IPreparedModel::createReusableExecution() error: " << result.error().message;
722         const int n = convertErrorStatusToResultCode(result.error().code);
723         return {n, nullptr};
724     }
725     auto execution = std::make_shared<DriverExecution>(
726             std::move(result).value(), std::move(request), memories, measure, loopTimeoutDuration,
727             mDevice->getFeatureLevel(), metaData);
728     return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
729 }
730 
compute(const SharedBurst & burstController,const OptionalTimePoint & deadline) const731 std::tuple<int, std::vector<OutputShape>, Timing> DriverExecution::compute(
732         const SharedBurst& burstController, const OptionalTimePoint& deadline) const {
733     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::compute");
734 
735     // compute using burst if present, otherwise compute from IPreparedModel
736     SharedExecution execution;
737     const bool burstCompute = (burstController != nullptr);
738     if (burstCompute) {
739         // create a reusable burst execution if the controller is not seen before
740         auto burstExecution = mCachedBurstExecutions.find(burstController.get());
741         if (burstExecution == mCachedBurstExecutions.end()) {
742             for (const RuntimeMemory* memory : kMemories) {
743                 const auto pool = memory->getMemoryPool();
744                 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
745                     auto cacheHold = burstController->cacheMemory(*maybeMemory);
746                     memory->hold(cacheHold);
747                 }
748             }
749             auto createResult = burstController->createReusableExecution(
750                     kRequest, kMeasure, kLoopTimeoutDuration, kMetaData,
751                     TypeManager::get()->getExtensionNameAndPrefix(kMetaData));
752             if (!createResult.ok()) {
753                 LOG(ERROR) << "IBurst::createReusableExecution() error: "
754                            << createResult.error().message;
755                 const int n = convertErrorStatusToResultCode(createResult.error().code);
756                 return {n, {}, {}};
757             }
758             execution = std::move(createResult).value();
759             mCachedBurstExecutions.emplace(burstController.get(), execution);
760         } else {
761             execution = burstExecution->second;
762         }
763         VLOG(EXECUTION) << "Before mBurstExecution->compute() " << SHOW_IF_DEBUG(kRequest);
764     } else {
765         execution = kExecution;
766     }
767 
768     CHECK(execution != nullptr);
769     auto result = execution->compute(deadline);
770     if (!result.ok()) {
771         auto [message, code, returnedOutputShapes] = std::move(result).error();
772         int n = convertErrorStatusToResultCode(code);
773         VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
774         LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
775                    << "::execute(...) error: " << message;
776         if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
777             return {n, std::move(returnedOutputShapes), {}};
778         }
779         return {n, {}, {}};
780     }
781 
782     VLOG(EXECUTION) << "DriverExecution::compute completed";
783     auto [outputShapes, timing] = std::move(result).value();
784     return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
785 }
786 
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & timeoutDurationAfterFence) const787 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverExecution::computeFenced(
788         const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
789         const OptionalDuration& timeoutDurationAfterFence) const {
790     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::computeFenced");
791     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
792 
793     std::vector<SyncFence> waitForHandles;
794     waitForHandles.reserve(waitFor.size());
795     for (int fd : waitFor) {
796         int dupFd = dup(fd);
797         if (dupFd < 0) {
798             LOG(ERROR) << "Unable to dup the file descriptor";
799             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
800         }
801         waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
802     }
803 
804     SyncFence syncFence = SyncFence::createAsSignaled();
805     ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
806     Timing timing = {};
807     if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, kDeviceFeatureLevel)) {
808         auto result =
809                 kExecution->computeFenced(waitForHandles, deadline, timeoutDurationAfterFence);
810         if (!result.ok()) {
811             LOG(ERROR) << "IExecution::computeFenced() error: " << result.error().message;
812             VLOG(EXECUTION) << "**computeFenced failed**";
813             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
814         }
815         std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
816     } else {
817         // Fallback to synchronous execution if computeFenced is not supported.
818         // First wait for all sync fences to be ready.
819         LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
820         for (const auto& fence : waitForHandles) {
821             if (!fence.hasFd() || fence.getFd() < 0) {
822                 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
823             }
824             auto r = fence.syncWait({/* no timeout */});
825             if (r != SyncFence::FenceState::SIGNALED) {
826                 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
827                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
828             }
829         }
830         auto result = kExecution->compute(deadline);
831         if (!result.ok()) {
832             LOG(ERROR) << "IExecution::compute() error: " << result.error().message;
833             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
834         }
835         std::tie(std::ignore, timing) = result.value();
836     }
837 
838     int syncFenceFd = -1;
839     if (syncFence.hasFd()) {
840         syncFenceFd = dup(syncFence.getFd());
841         if (syncFenceFd < 0) {
842             LOG(ERROR) << "Failed to dup the file descriptor";
843             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
844         }
845     }
846 
847     VLOG(EXECUTION) << "DriverExecution::computeFenced completed";
848     return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
849 }
850 
createCpuCapabilities()851 static Capabilities createCpuCapabilities() {
852     constexpr Capabilities::PerformanceInfo kPerf = {.execTime = 1.0f, .powerUsage = 1.0f};
853     return makeCapabilities(kPerf, kPerf, kPerf);
854 }
855 
856 // A special abstracted device for the CPU. Only one instance of this class will exist.
857 // Use get() to retrieve it.
858 class CpuDevice : public Device {
859    public:
860     // Returns the singleton CPU fallback device.
get()861     static std::shared_ptr<CpuDevice> get() {
862         static std::shared_ptr<CpuDevice> instance(new CpuDevice);
863         return instance;
864     }
865 
getName() const866     const std::string& getName() const override { return kName; }
getVersionString() const867     const std::string& getVersionString() const override { return kVersionString; }
getFeatureLevel() const868     Version getFeatureLevel() const override { return kVersion; }
getType() const869     int32_t getType() const override { return ANEURALNETWORKS_DEVICE_CPU; }
getSupportedExtensions() const870     const std::vector<Extension>& getSupportedExtensions() const override {
871         return kSupportedExtensions;
872     }
873     std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const874     const Capabilities& getCapabilities() const override { return kCapabilities; }
getPerformance(OperandType) const875     Capabilities::PerformanceInfo getPerformance(OperandType) const override {
876         return kPerformance;
877     }
getRelaxedFloat32toFloat16PerformanceScalar() const878     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
879         return kPerformance;
880     }
getRelaxedFloat32toFloat16PerformanceTensor() const881     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
882         return kPerformance;
883     }
getIfPerformance() const884     Capabilities::PerformanceInfo getIfPerformance() const override { return kPerformance; }
getWhilePerformance() const885     Capabilities::PerformanceInfo getWhilePerformance() const override { return kPerformance; }
getNumberOfCacheFilesNeeded() const886     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
887         return {/*numModelCache=*/0, /*numDataCache=*/0};
888     }
isCachingSupported() const889     bool isCachingSupported() const override { return false; }
wait() const890     int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
891 
892     std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
893             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
894             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
895             const std::optional<CacheToken>& maybeToken,
896             const std::vector<TokenValuePair>& metaData,
897             const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
898 
899     std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
900                                                             OperandType type) const override;
901 
902    private:
903     CpuDevice() = default;
904     const Version kVersion = getRuntimeFeatureLevelVersion();
905     const std::string kName = "nnapi-reference";
906 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
907     const std::string kVersionString = build::GetBuildNumber();
908 #else
909     const std::string kVersionString = "UNKNOWN";
910 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
911     // Since the performance is a ratio compared to the CPU performance,
912     // by definition the performance of the CPU is 1.0.
913     const Capabilities::PerformanceInfo kPerformance = {.execTime = 1.0f, .powerUsage = 1.0f};
914     const Capabilities kCapabilities = createCpuCapabilities();
915     const std::vector<Extension> kSupportedExtensions{/* No extensions. */};
916 };
917 
918 // A special abstracted RuntimePreparedModel for the CPU, constructed by CpuDevice.
919 class CpuPreparedModel : public RuntimePreparedModel {
920    public:
921     // Factory method for CpuPreparedModel. Returns ANEURALNETWORKS_NO_ERROR and
922     // a prepared model object if successfully created. Returns an error code
923     // and nullptr otherwise.
924     static std::pair<int, std::shared_ptr<RuntimePreparedModel>> create(Model model);
925 
getDevice() const926     const Device* getDevice() const override { return CpuDevice::get().get(); }
getInterface() const927     SharedPreparedModel getInterface() const override { return nullptr; }
928 
929     std::tuple<int, std::vector<OutputShape>, Timing> execute(
930             const std::vector<ModelArgumentInfo>& inputs,
931             const std::vector<ModelArgumentInfo>& outputs,
932             const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
933             MeasureTiming measure, const OptionalTimePoint& deadline,
934             const OptionalDuration& loopTimeoutDuration,
935             const std::vector<TokenValuePair>& metaData) const override;
936 
configureExecutionBurst() const937     GeneralResult<SharedBurst> configureExecutionBurst() const override { return nullptr; }
938 
939     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
940             const std::vector<ModelArgumentInfo>& inputs,
941             const std::vector<ModelArgumentInfo>& outputs,
942             const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
943             MeasureTiming measure, const OptionalTimePoint& deadline,
944             const OptionalDuration& loopTimeoutDuration,
945             const OptionalDuration& timeoutDurationAfterFence,
946             const std::vector<TokenValuePair>& metaData) const override;
947 
948     std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
949             const std::vector<ModelArgumentInfo>& inputs,
950             const std::vector<ModelArgumentInfo>& outputs,
951             const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
952             const OptionalDuration& loopTimeoutDuration,
953             const std::vector<TokenValuePair>& metaData) const override;
954 
getMemoryPreference() const955     MemoryPreference getMemoryPreference() const override {
956         return {kPreferredAlignment, kPreferredPadding};
957     }
958 
959     // Prefer to use CpuPreparedModel::create.
CpuPreparedModel(Model model,std::vector<RunTimePoolInfo> poolInfos)960     CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
961         : mModel(std::move(model)), mModelPoolInfos(std::move(poolInfos)) {}
962 
getModel() const963     const Model& getModel() const { return mModel; }
getModelPoolInfos() const964     const std::vector<RunTimePoolInfo>& getModelPoolInfos() const { return mModelPoolInfos; }
965 
966    private:
967     // TFLite kernels prefers 64 bytes for padding and alignment.
968     static constexpr uint32_t kPreferredAlignment = 64;
969     static constexpr uint32_t kPreferredPadding = 64;
970 
971     const Model mModel;
972     const std::vector<RunTimePoolInfo> mModelPoolInfos;
973 };
974 
975 class CpuExecution : public RuntimeExecution {
976    public:
CpuExecution(const CpuPreparedModel & preparedModel,Request request,std::vector<RunTimePoolInfo> requestPoolInfos,OptionalDuration loopTimeoutDuration)977     CpuExecution(const CpuPreparedModel& preparedModel, Request request,
978                  std::vector<RunTimePoolInfo> requestPoolInfos,
979                  OptionalDuration loopTimeoutDuration)
980         : kPreparedModel(preparedModel),
981           kRequest(std::move(request)),
982           kRequestPoolInfos(std::move(requestPoolInfos)),
983           kLoopTimeoutDuration(std::move(loopTimeoutDuration)) {}
984 
985     std::tuple<int, std::vector<OutputShape>, Timing> compute(
986             const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
987 
988     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
989             const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
990             const OptionalDuration& timeoutDurationAfterFence) const override;
991 
992    private:
993     const CpuPreparedModel& kPreparedModel;
994     Request kRequest;
995     std::vector<RunTimePoolInfo> kRequestPoolInfos;
996     const OptionalDuration kLoopTimeoutDuration;
997 };
998 
getSupportedOperations(const MetaModel & metaModel) const999 std::vector<bool> CpuDevice::getSupportedOperations(const MetaModel& metaModel) const {
1000     const Model& model = metaModel.getModel();
1001     const size_t count = model.main.operations.size();
1002     std::vector<bool> result(count, false);
1003     for (size_t i = 0; i < count; i++) {
1004         // TODO(b/119870033): Decide whether and how post-P operations would be supported on CPU.
1005         //                    We may want to use the slicer for CpuDevice just as we do for
1006         //                    DriverDevice.
1007         OperationType operationType = model.main.operations[i].type;
1008         result[i] = !isExtension(operationType) && operationType != OperationType::OEM_OPERATION;
1009     }
1010     return result;
1011 }
1012 
1013 template <typename Type>
validateAndCheckCompliance(const Type & object)1014 static Result<void> validateAndCheckCompliance(const Type& object) {
1015     const auto version = NN_TRY(validate(object));
1016     if (!isCompliantVersion(version, DeviceManager::get()->getRuntimeVersion())) {
1017         return NN_ERROR() << "Object than is newer what is allowed. Version needed: " << version
1018                           << ", current runtime version supported: "
1019                           << DeviceManager::get()->getRuntimeVersion();
1020     }
1021     return {};
1022 }
1023 
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo &,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> &,const std::vector<ExtensionNameAndPrefix> &) const1024 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuDevice::prepareModel(
1025         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
1026         const OptionalTimePoint& deadline, const CacheInfo& /*cacheInfo*/,
1027         const std::optional<CacheToken>& maybeToken,
1028         const std::vector<TokenValuePair>& /*metaData*/,
1029         const std::vector<ExtensionNameAndPrefix>& /*extensionNameAndPrefix*/) const {
1030     CHECK(!maybeToken.has_value())
1031             << "Should never call prepareModel with cache information on CpuDevice";
1032 
1033     const Model model = makeModel();
1034     if (auto result = validateAndCheckCompliance(model); !result.ok()) {
1035         LOG(ERROR) << "Invalid Model: " << result.error();
1036         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1037     }
1038     if (auto result = validateAndCheckCompliance(preference); !result.ok()) {
1039         LOG(ERROR) << "Invalid ExecutionPreference: " << result.error();
1040         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1041     }
1042     if (auto result = validateAndCheckCompliance(priority); !result.ok()) {
1043         LOG(ERROR) << "Invalid Priority: " << result.error();
1044         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1045     }
1046     if (hasDeadlinePassed(deadline)) {
1047         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, nullptr};
1048     }
1049 
1050     return CpuPreparedModel::create(model);
1051 }
1052 
allocate(const MemoryDescriptor & desc,OperandType type) const1053 std::pair<int, std::unique_ptr<RuntimeMemory>> CpuDevice::allocate(const MemoryDescriptor& desc,
1054                                                                    OperandType type) const {
1055     uint32_t size = TypeManager::get()->getSizeOfData(type, desc.dimensions);
1056     if (size == 0) {
1057         LOG(ERROR) << "CpuDevice::allocate -- does not support unknown dimensions.";
1058         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1059     }
1060     return MemoryAshmem::create(size);
1061 }
1062 
create(Model model)1063 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuPreparedModel::create(Model model) {
1064     std::vector<RunTimePoolInfo> poolInfos;
1065     if (!setRunTimePoolInfosFromCanonicalMemories(&poolInfos, model.pools)) {
1066         return {ANEURALNETWORKS_UNMAPPABLE, nullptr};
1067     }
1068 
1069     std::shared_ptr<RuntimePreparedModel> preparedModel =
1070             std::make_shared<CpuPreparedModel>(std::move(model), std::move(poolInfos));
1071     return {ANEURALNETWORKS_NO_ERROR, std::move(preparedModel)};
1072 }
1073 
computeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration)1074 static std::tuple<int, std::vector<OutputShape>, Timing> computeOnCpu(
1075         const Model& model, const Request& request,
1076         const std::vector<RunTimePoolInfo>& modelPoolInfos,
1077         const std::vector<RunTimePoolInfo>& requestPoolInfos, const OptionalTimePoint& deadline,
1078         const OptionalDuration& loopTimeoutDuration) {
1079     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
1080     CpuExecutor executor;
1081     if (loopTimeoutDuration.has_value()) {
1082         executor.setLoopTimeout(loopTimeoutDuration->count());
1083     }
1084     if (deadline.has_value()) {
1085         executor.setDeadline(*deadline);
1086     }
1087     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
1088     const auto& outputShapes = executor.getOutputShapes();
1089     return {err, outputShapes, {}};
1090 }
1091 
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & duration,const std::vector<TokenValuePair> &) const1092 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuPreparedModel::executeFenced(
1093         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1094         const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
1095         MeasureTiming measure, const OptionalTimePoint& deadline,
1096         const OptionalDuration& loopTimeoutDuration, const OptionalDuration& duration,
1097         const std::vector<TokenValuePair>& /*metaData*/) const {
1098     VLOG(EXECUTION)
1099             << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
1100     for (int syncFd : waitFor) {
1101         if (syncFd > 0) {
1102             auto r = syncWait(syncFd, -1);
1103             if (r != FenceState::SIGNALED) {
1104                 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1105                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1106             }
1107         }
1108     }
1109 
1110     // Update deadline if the timeout duration is closer than the deadline.
1111     auto closestDeadline = deadline;
1112     if (duration.has_value()) {
1113         const auto timeoutDurationDeadline = makeDeadline(*duration);
1114         if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1115             closestDeadline = timeoutDurationDeadline;
1116         }
1117     }
1118 
1119     const auto [result, outputShapes, timing] = execute(inputs, outputs, memories, nullptr, measure,
1120                                                         closestDeadline, loopTimeoutDuration, {});
1121     return {result, -1, nullptr, timing};
1122 }
1123 
createCpuRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)1124 static std::tuple<int, Request, std::vector<RunTimePoolInfo>> createCpuRequest(
1125         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1126         const std::vector<const RuntimeMemory*>& memories) {
1127     std::vector<RunTimePoolInfo> requestPoolInfos;
1128     requestPoolInfos.reserve(memories.size());
1129     for (const RuntimeMemory* mem : memories) {
1130         if (std::optional<RunTimePoolInfo> poolInfo = mem->getRunTimePoolInfo()) {
1131             requestPoolInfos.emplace_back(*poolInfo);
1132         } else {
1133             return {ANEURALNETWORKS_UNMAPPABLE, {}, {}};
1134         }
1135     }
1136     // Create as many pools as there are input / output.
1137     auto fixPointerArguments =
1138             [&requestPoolInfos](const std::vector<ModelArgumentInfo>& argumentInfos) {
1139                 std::vector<DataLocation> ptrArgsLocations;
1140                 for (const ModelArgumentInfo& argumentInfo : argumentInfos) {
1141                     if (argumentInfo.state() == ModelArgumentInfo::POINTER) {
1142                         ptrArgsLocations.push_back(
1143                                 {.poolIndex = static_cast<uint32_t>(requestPoolInfos.size()),
1144                                  .offset = 0,
1145                                  .length = argumentInfo.length(),
1146                                  .padding = argumentInfo.padding()});
1147                         requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
1148                                 static_cast<uint8_t*>(argumentInfo.buffer())));
1149                     }
1150                 }
1151                 return ptrArgsLocations;
1152             };
1153     const std::vector<DataLocation> inputPtrArgsLocations = fixPointerArguments(inputs);
1154     const std::vector<DataLocation> outputPtrArgsLocations = fixPointerArguments(outputs);
1155 
1156     Request request;
1157     request.inputs = createRequestArguments(inputs, inputPtrArgsLocations);
1158     request.outputs = createRequestArguments(outputs, outputPtrArgsLocations);
1159     return {ANEURALNETWORKS_NO_ERROR, std::move(request), std::move(requestPoolInfos)};
1160 }
1161 
1162 // Perform computation on NNAPI CPU reference implementation.
1163 //
1164 // Contrary to DriverPreparedModel::execute, the NNAPI CPU reference executor lives in the
1165 // same process as the NNAPI runtime and can take raw pointers. We will create as many pools as
1166 // there are input/output in this method to avoid data copying.
1167 //
1168 // Will choose between sync/async execution according to DeviceManager::mSyncExecCpu.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst &,MeasureTiming,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1169 std::tuple<int, std::vector<OutputShape>, Timing> CpuPreparedModel::execute(
1170         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1171         const std::vector<const RuntimeMemory*>& memories, const SharedBurst& /*burstController*/,
1172         MeasureTiming /*measure*/, const OptionalTimePoint& deadline,
1173         const OptionalDuration& loopTimeoutDuration,
1174         const std::vector<TokenValuePair>& /*metaData*/) const {
1175     if (hasDeadlinePassed(deadline)) {
1176         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1177     }
1178 
1179     int nCreateRequest;
1180     Request request;
1181     std::vector<RunTimePoolInfo> requestPoolInfos;
1182     std::tie(nCreateRequest, request, requestPoolInfos) =
1183             createCpuRequest(inputs, outputs, memories);
1184     if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1185         return {nCreateRequest, {}, {}};
1186     }
1187 
1188     if (!DeviceManager::get()->syncExecCpu()) {
1189         // TODO: use a thread pool
1190         // TODO(mikie): this could have NNTRACE so we could measure the overhead
1191         //              of spinning up a new thread.
1192         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1193         std::thread([this, &request, &requestPoolInfos, &deadline, &loopTimeoutDuration, &result] {
1194             result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1195                                   loopTimeoutDuration);
1196         }).join();
1197         return result;
1198     }
1199 
1200     return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1201                         loopTimeoutDuration);
1202 }
1203 
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1204 std::pair<int, std::shared_ptr<RuntimeExecution>> CpuPreparedModel::createReusableExecution(
1205         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1206         const std::vector<const RuntimeMemory*>& memories, MeasureTiming /*measure*/,
1207         const OptionalDuration& loopTimeoutDuration,
1208         const std::vector<TokenValuePair>& /*metaData*/) const {
1209     auto [nCreateRequest, request, requestPoolInfos] = createCpuRequest(inputs, outputs, memories);
1210     if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1211         return {nCreateRequest, nullptr};
1212     }
1213     auto execution = std::make_shared<CpuExecution>(
1214             *this, std::move(request), std::move(requestPoolInfos), loopTimeoutDuration);
1215     return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
1216 }
1217 
compute(const SharedBurst &,const OptionalTimePoint & deadline) const1218 std::tuple<int, std::vector<OutputShape>, Timing> CpuExecution::compute(
1219         const SharedBurst& /*burstController*/, const OptionalTimePoint& deadline) const {
1220     if (hasDeadlinePassed(deadline)) {
1221         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1222     }
1223 
1224     if (!DeviceManager::get()->syncExecCpu()) {
1225         // TODO: use a thread pool
1226         // TODO(mikie): this could have NNTRACE so we could measure the overhead
1227         //              of spinning up a new thread.
1228         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1229         std::thread([this, &deadline, &result] {
1230             result = computeOnCpu(kPreparedModel.getModel(), kRequest,
1231                                   kPreparedModel.getModelPoolInfos(), kRequestPoolInfos, deadline,
1232                                   kLoopTimeoutDuration);
1233         }).join();
1234         return result;
1235     }
1236 
1237     return computeOnCpu(kPreparedModel.getModel(), kRequest, kPreparedModel.getModelPoolInfos(),
1238                         kRequestPoolInfos, deadline, kLoopTimeoutDuration);
1239 }
1240 
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & duration) const1241 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuExecution::computeFenced(
1242         const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
1243         const OptionalDuration& duration) const {
1244     VLOG(EXECUTION)
1245             << "CpuExecution::computeFenced wait for sync fences to signal before execution";
1246     for (int syncFd : waitFor) {
1247         if (syncFd > 0) {
1248             auto r = syncWait(syncFd, -1);
1249             if (r != FenceState::SIGNALED) {
1250                 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1251                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1252             }
1253         }
1254     }
1255 
1256     // Update deadline if the timeout duration is closer than the deadline.
1257     auto closestDeadline = deadline;
1258     if (duration.has_value()) {
1259         const auto timeoutDurationDeadline = makeDeadline(*duration);
1260         if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1261             closestDeadline = timeoutDurationDeadline;
1262         }
1263     }
1264 
1265     const auto [result, outputShapes, timing] = compute(nullptr, closestDeadline);
1266     return {result, -1, nullptr, timing};
1267 }
1268 
getRuntimeFeatureLevel() const1269 int64_t DeviceManager::getRuntimeFeatureLevel() const {
1270     return versionToFeatureLevel(mRuntimeVersion.level);
1271 }
1272 
get()1273 DeviceManager* DeviceManager::get() {
1274     static DeviceManager manager;
1275     return &manager;
1276 }
1277 
getCpuDevice()1278 std::shared_ptr<Device> DeviceManager::getCpuDevice() {
1279     return CpuDevice::get();
1280 }
1281 
forTest_makeDriverDevice(const SharedDevice & device)1282 std::shared_ptr<Device> DeviceManager::forTest_makeDriverDevice(const SharedDevice& device) {
1283     VLOG(MANAGER) << "forTest_makeDriverDevice(" << device->getName() << ")";
1284     const auto driverDevice = DriverDevice::create(device);
1285     CHECK(driverDevice != nullptr);
1286     return driverDevice;
1287 }
1288 
1289 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
getDriverDevices(Version::Level maxFeatureLevelAllowed)1290 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1291         [[maybe_unused]] Version::Level maxFeatureLevelAllowed) {
1292 #ifdef __ANDROID__
1293     auto devices = hardware::neuralnetworks::service::getDevices(maxFeatureLevelAllowed);
1294 
1295     std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1296     driverDevices.reserve(devices.size());
1297     for (auto& device : devices) {
1298         driverDevices.push_back(DriverDevice::create(std::move(device)));
1299     }
1300     return driverDevices;
1301 #else   // __ANDROID__
1302     return {};
1303 #endif  // __ANDROID__
1304 }
1305 #else
getDriverDevices(Version::Level)1306 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1307         Version::Level /*maxFeatureLevelAllowed*/) {
1308     auto devices = getDevices();
1309     std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1310     driverDevices.reserve(devices.size());
1311     for (auto& device : devices) {
1312         driverDevices.push_back(DriverDevice::create(std::move(device)));
1313     }
1314     return driverDevices;
1315 }
1316 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
1317 
findAvailableDevices()1318 void DeviceManager::findAvailableDevices() {
1319     VLOG(MANAGER) << "findAvailableDevices";
1320 
1321 #ifdef NN_DEBUGGABLE
1322     // debug.nn.enabled-devices defines a regex pattern. For all available driver devices, only the
1323     // ones with name matching the pattern are enabled. Driver devices with unmatched names are
1324     // ignored. If this property is not set, all available driver devices are enabled by default.
1325     // This filter only applies to driver devices. nnapi-reference is always enabled.
1326     std::string patternStr = base::GetProperty("debug.nn.enabled-devices", ".*");
1327     LOG(INFO) << "Enabled devices: " << patternStr;
1328     const std::regex pattern(patternStr);
1329 #endif  // NN_DEBUGGABLE
1330 
1331     // register driver devices
1332     auto driverDevices = getDriverDevices(mRuntimeVersion.level);
1333     for (auto& driverDevice : driverDevices) {
1334 #ifdef NN_DEBUGGABLE
1335         if (!std::regex_match(driverDevice->getName(), pattern)) {
1336             LOG(INFO) << "Ignored interface " << driverDevice->getName()
1337                       << " (version = " << driverDevice->getVersionString() << ")";
1338             continue;
1339         }
1340 #endif  // NN_DEBUGGABLE
1341         LOG(INFO) << "Found interface " << driverDevice->getName()
1342                   << " (version = " << driverDevice->getVersionString() << ")";
1343         mDevices.push_back(std::move(driverDevice));
1344     }
1345 
1346 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
1347     // register CPU fallback device
1348     mDevices.push_back(CpuDevice::get());
1349     mDevicesCpuOnly.push_back(CpuDevice::get());
1350 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
1351 }
1352 
registerDevice(const SharedDevice & device)1353 void DeviceManager::registerDevice(const SharedDevice& device) {
1354     if (auto driverDevice = DriverDevice::create(device)) {
1355         mDevices.push_back(std::move(driverDevice));
1356     }
1357 }
1358 
DeviceManager()1359 DeviceManager::DeviceManager() {
1360     VLOG(MANAGER) << "DeviceManager::DeviceManager";
1361     mRuntimeVersion = getRuntimeFeatureLevelVersion();
1362     mIsPlatformTelemetryEnabled = getWhetherPlatformTelemetryIsEnabled();
1363     findAvailableDevices();
1364 #ifdef NN_DEBUGGABLE
1365     mStrictSlicing = (getProp("debug.nn.strict-slicing") != 0);
1366     mPartitioning = getProp("debug.nn.partition", kPartitioningDefault);
1367     mDebugNNCpuOnly = (getProp("debug.nn.cpuonly") != 0);
1368     mSyncExecCpu = (getProp("debug.nn.syncexec-cpu", 1) != 0);
1369     mSyncExecRuntime = (getProp("debug.nn.syncexec-runtime") != 0);
1370 #endif  // NN_DEBUGGABLE
1371 }
1372 
1373 }  // namespace nn
1374 }  // namespace android
1375