1 // 2 // Copyright © 2020 Arm Ltd. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 // 5 6 #pragma once 7 8 #include "ArmnnDriver.hpp" 9 #include "ArmnnDriverImpl.hpp" 10 #include "RequestThread_1_3.hpp" 11 #include "ModelToINetworkConverter.hpp" 12 13 #include <NeuralNetworks.h> 14 #include <armnn/ArmNN.hpp> 15 16 #include <string> 17 #include <vector> 18 19 namespace armnn_driver 20 { 21 using CallbackAsync_1_3 = std::function< 22 void(V1_3::ErrorStatus errorStatus, 23 std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, 24 const ::android::hardware::neuralnetworks::V1_2::Timing& timing, 25 std::string callingFunction)>; 26 27 struct ExecutionContext_1_3 28 { 29 ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = 30 ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; 31 TimePoint driverStart; 32 TimePoint driverEnd; 33 TimePoint deviceStart; 34 TimePoint deviceEnd; 35 }; 36 37 using CallbackContext_1_3 = CallbackContext<CallbackAsync_1_3, ExecutionContext_1_3>; 38 39 using executeFenced_cb = std::function<void(::android::hardware::neuralnetworks::V1_3::ErrorStatus status, 40 const ::android::hardware::hidl_handle& syncFence, 41 const ::android::sp<::android::hardware::neuralnetworks::V1_3::IFencedExecutionCallback>& callback)>; 42 43 template <typename HalVersion> 44 class ArmnnPreparedModel_1_3 : public V1_3::IPreparedModel 45 { 46 public: 47 using HalModel = typename V1_3::Model; 48 49 ArmnnPreparedModel_1_3(armnn::NetworkId networkId, 50 armnn::IRuntime* runtime, 51 const HalModel& model, 52 const std::string& requestInputsAndOutputsDumpDir, 53 const bool gpuProfilingEnabled, 54 V1_3::Priority priority = V1_3::Priority::MEDIUM); 55 56 virtual ~ArmnnPreparedModel_1_3(); 57 58 Return<V1_0::ErrorStatus> execute(const V1_0::Request& request, 59 const sp<V1_0::IExecutionCallback>& callback) override; 60 61 Return<V1_0::ErrorStatus> execute_1_2(const V1_0::Request& request, V1_2::MeasureTiming measure, 62 const sp<V1_2::IExecutionCallback>& callback) override; 63 64 Return<V1_3::ErrorStatus> execute_1_3(const V1_3::Request& request, 65 V1_2::MeasureTiming measure, 66 const V1_3::OptionalTimePoint&, 67 const V1_3::OptionalTimeoutDuration&, 68 const sp<V1_3::IExecutionCallback>& callback) override; 69 70 Return<void> executeSynchronously(const V1_0::Request &request, 71 V1_2::MeasureTiming measure, 72 V1_3::IPreparedModel::executeSynchronously_cb cb) override; 73 74 Return<void> executeSynchronously_1_3(const V1_3::Request &request, 75 V1_2::MeasureTiming measure, 76 const V1_3::OptionalTimePoint& deadline, 77 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, 78 V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override; 79 80 Return<void> executeFenced(const V1_3::Request& request, 81 const android::hardware::hidl_vec<android::hardware::hidl_handle>& fenceWaitFor, 82 V1_2::MeasureTiming measure, 83 const V1_3::OptionalTimePoint& deadline, 84 const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, 85 const V1_3::OptionalTimeoutDuration& duration, 86 executeFenced_cb callback) override; 87 88 Return<void> configureExecutionBurst( 89 const sp<V1_2::IBurstCallback>& callback, 90 const android::hardware::MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel, 91 const android::hardware::MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel, 92 configureExecutionBurst_cb cb) override; 93 94 template<typename CallbackContext> 95 Return<void> ExecuteSynchronously(const V1_3::Request& request, CallbackContext cbCtx); 96 97 /// execute the graph prepared from the request 98 template<typename CallbackContext> 99 Return <V1_3::ErrorStatus> ExecuteGraph( 100 std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, 101 armnn::InputTensors& inputTensors, 102 armnn::OutputTensors& outputTensors, 103 CallbackContext callback); 104 105 /// Executes this model with dummy inputs (e.g. all zeroes). 106 /// \return false on failure, otherwise true 107 bool ExecuteWithDummyInputs(); 108 109 V1_3::Priority GetModelPriority(); 110 111 private: 112 Return <V1_3::ErrorStatus> Execute(const V1_3::Request& request, 113 V1_2::MeasureTiming measureTiming, 114 CallbackAsync_1_3 callback); 115 116 Return<V1_3::ErrorStatus> PrepareMemoryForInputs( 117 armnn::InputTensors& inputs, 118 const V1_3::Request& request, 119 const std::vector<android::nn::RunTimePoolInfo>& memPools); 120 121 Return<V1_3::ErrorStatus> PrepareMemoryForOutputs( 122 armnn::OutputTensors& outputs, 123 std::vector<V1_2::OutputShape> &outputShapes, 124 const V1_3::Request& request, 125 const std::vector<android::nn::RunTimePoolInfo>& memPools); 126 127 std::tuple<V1_3::ErrorStatus, android::hardware::hidl_vec<V1_2::OutputShape>, V1_2::Timing, std::string> PrepareMemoryForIO( 128 armnn::InputTensors& inputs, 129 armnn::OutputTensors& outputs, 130 std::vector<android::nn::RunTimePoolInfo>& memPools, 131 const V1_3::Request& request); 132 133 template <typename TensorBindingCollection> 134 void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); 135 136 armnn::NetworkId m_NetworkId; 137 armnn::IRuntime* m_Runtime; 138 V1_3::Model m_Model; 139 // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads 140 // It is specific to this class, so it is declared as static here 141 static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread; 142 uint32_t m_RequestCount; 143 const std::string& m_RequestInputsAndOutputsDumpDir; 144 const bool m_GpuProfilingEnabled; 145 V1_3::Priority m_ModelPriority; 146 }; 147 148 } 149