1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 18 #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 19 20 #include "Callbacks.h" 21 #include "HalInterfaces.h" 22 #include "Memory.h" 23 #include "ModelBuilder.h" 24 #include "NeuralNetworks.h" 25 26 #include <unordered_map> 27 #include <vector> 28 29 using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback; 30 using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback; 31 32 namespace android { 33 namespace nn { 34 35 class CompilationBuilder; 36 class ExecutionPlan; 37 class Memory; 38 class ModelBuilder; 39 class StepExecutor; 40 41 // TODO move length out of DataLocation 42 struct ModelArgumentInfo { 43 // Whether the argument was specified as being in a Memory, as a pointer, 44 // has no value, or has not been specified. 45 // If POINTER then: 46 // locationAndLength.length is valid. 47 // dimensions is valid. 48 // buffer is valid 49 // If MEMORY then: 50 // locationAndLength.location.{poolIndex, offset, length} is valid. 51 // dimensions is valid. 52 enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED; 53 DataLocation locationAndLength; 54 std::vector<uint32_t> dimensions; 55 void* buffer; 56 57 int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer, 58 uint32_t length); 59 int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 60 uint32_t poolIndex, uint32_t offset, uint32_t length); 61 int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset); 62 int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType); 63 }; 64 65 class ExecutionBuilder { 66 friend class StepExecutor; 67 public: 68 ExecutionBuilder(const CompilationBuilder* compilation); 69 70 int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer, 71 size_t length); 72 int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 73 const Memory* memory, size_t offset, size_t length); 74 int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 75 size_t length); 76 int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 77 const Memory* memory, size_t offset, size_t length); 78 int startCompute(sp<ExecutionCallback>* synchronizationCallback); 79 getModel()80 const ModelBuilder* getModel() const { return mModel; } 81 82 private: 83 const ModelBuilder* mModel; 84 const ExecutionPlan* mPlan; 85 86 // The information we'll send to the driver about the inputs and outputs. 87 // Note that we build this in two steps: 88 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 89 // If set from a pointer, don't set the location in the RequestArgument but store it 90 // instead in mInputBuffers or mOutputBuffers. 91 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 92 // the m*Buffers entries. Copy the input values into the shared memory. 93 // We do this to avoid creating a lot of shared memory objects if we have a lot of 94 // parameters specified via pointers. We also avoid copying in the case where 95 // some of the nodes will interpreted on the CPU anyway. 96 std::vector<ModelArgumentInfo> mInputs; 97 std::vector<ModelArgumentInfo> mOutputs; 98 MemoryTracker mMemories; 99 }; 100 101 // class StepExecutor is used to execute a single "step" in a 102 // potentially multiple step execution process. The graph associated 103 // with that step is executed in its entirety on a single device (or 104 // on the CPU). 105 class StepExecutor { 106 public: 107 // executionBuilder 108 // Describes the full (possibly multiple-"step") execution. 109 // model 110 // The model to be executed by the executor. Possibly a 111 // submodel of the model from executionBuilder. 112 // driver, preparedModel 113 // The device on which to execute the "step", and the prepared 114 // model to execute on that device. (Both are nullptr in the 115 // case of CPU.) 116 StepExecutor(const ExecutionBuilder* executionBuilder, 117 const ModelBuilder* model, 118 sp<IDevice> driver, sp<IPreparedModel> preparedModel); 119 120 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 121 // in the case where we have a single-"step" execution (i.e., the executor 122 // is executing the entire model from the ExecutionBuilder). 123 void mapInputsAndOutputsTrivially(); 124 125 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 126 // one at a time. Note that these are input/output indexes, not 127 // operand indexes. mapInput(uint32_t builderIndex,uint32_t executorIndex)128 void mapInput(uint32_t builderIndex, uint32_t executorIndex) { 129 mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], 130 &mInputs[executorIndex]); 131 } mapOutput(uint32_t builderIndex,uint32_t executorIndex)132 void mapOutput(uint32_t builderIndex, uint32_t executorIndex) { 133 mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], 134 &mOutputs[executorIndex]); 135 } 136 137 // The input or output is assumed to have the size of the 138 // corresponding operand. setInputFromTemporaryMemory(uint32_t inputIndex,const Memory * memory,uint32_t offset)139 int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) { 140 return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex), 141 memory, offset, 142 &mInputs.at(inputIndex)); 143 } setOutputFromTemporaryMemory(uint32_t outputIndex,const Memory * memory,uint32_t offset)144 int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) { 145 return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex), 146 memory, offset, 147 &mOutputs.at(outputIndex)); 148 } 149 150 // Executes using the (driver, preparedModel) specified at construction time. 151 int startCompute(sp<ExecutionCallback>* synchronizationCallback); 152 153 // Executes using the CPU, regardless of the (driver, 154 // preparedModel) specified at construction time. 155 int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback); 156 isCpu()157 bool isCpu() const { return mDriver == nullptr; } 158 159 private: 160 int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory); 161 int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback); 162 163 void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 164 ModelArgumentInfo* executorInputOrOutput); 165 166 int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand, 167 const Memory* memory, uint32_t offset, 168 ModelArgumentInfo* inputOrOutputInfo); 169 170 // describes the full (possibly multiple-"step") execution 171 const ExecutionBuilder* mExecutionBuilder; 172 173 // model to be executed on the executor, in both original and 174 // compiled forms; and device on which to execute it 175 const ModelBuilder* mModel; 176 sp<IDevice> mDriver; // nullptr if CPU execution 177 sp<IPreparedModel> mPreparedModel; // nullptr if CPU execution or if bypassing ExecutionPlan 178 179 // The information we'll send to the driver about the inputs and outputs. 180 // Note that we build this in two steps: 181 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 182 // If set from a pointer, don't set the location in the RequestArgument but store it 183 // instead in mInputBuffers or mOutputBuffers. 184 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 185 // the m*Buffers entries. Copy the input values into the shared memory. 186 // We do this to avoid creating a lot of shared memory objects if we have a lot of 187 // parameters specified via pointers. We also avoid copying in the case where 188 // some of the nodes will interpreted on the CPU anyway. 189 std::vector<ModelArgumentInfo> mInputs; 190 std::vector<ModelArgumentInfo> mOutputs; 191 MemoryTracker mMemories; 192 }; 193 194 } // namespace nn 195 } // namespace android 196 197 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 198