/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H #include "Callbacks.h" #include "HalInterfaces.h" #include "Memory.h" #include "ModelBuilder.h" #include "NeuralNetworks.h" #include #include using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback; using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback; namespace android { namespace nn { class CompilationBuilder; class ExecutionPlan; class Memory; class ModelBuilder; class StepExecutor; // TODO move length out of DataLocation struct ModelArgumentInfo { // Whether the argument was specified as being in a Memory, as a pointer, // has no value, or has not been specified. // If POINTER then: // locationAndLength.length is valid. // dimensions is valid. // buffer is valid // If MEMORY then: // locationAndLength.location.{poolIndex, offset, length} is valid. // dimensions is valid. enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED; DataLocation locationAndLength; std::vector dimensions; void* buffer; int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer, uint32_t length); int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, uint32_t poolIndex, uint32_t offset, uint32_t length); int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset); int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType); }; class ExecutionBuilder { friend class StepExecutor; public: ExecutionBuilder(const CompilationBuilder* compilation); int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length); int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, const Memory* memory, size_t offset, size_t length); int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, size_t length); int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, const Memory* memory, size_t offset, size_t length); int startCompute(sp* synchronizationCallback); const ModelBuilder* getModel() const { return mModel; } private: const ModelBuilder* mModel; const ExecutionPlan* mPlan; // The information we'll send to the driver about the inputs and outputs. // Note that we build this in two steps: // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. // If set from a pointer, don't set the location in the RequestArgument but store it // instead in mInputBuffers or mOutputBuffers. // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for // the m*Buffers entries. Copy the input values into the shared memory. // We do this to avoid creating a lot of shared memory objects if we have a lot of // parameters specified via pointers. We also avoid copying in the case where // some of the nodes will interpreted on the CPU anyway. std::vector mInputs; std::vector mOutputs; MemoryTracker mMemories; }; // class StepExecutor is used to execute a single "step" in a // potentially multiple step execution process. The graph associated // with that step is executed in its entirety on a single device (or // on the CPU). class StepExecutor { public: // executionBuilder // Describes the full (possibly multiple-"step") execution. // model // The model to be executed by the executor. Possibly a // submodel of the model from executionBuilder. // driver, preparedModel // The device on which to execute the "step", and the prepared // model to execute on that device. (Both are nullptr in the // case of CPU.) StepExecutor(const ExecutionBuilder* executionBuilder, const ModelBuilder* model, sp driver, sp preparedModel); // Map inputs and outputs from ExecutionBuilder to StepExecutor, // in the case where we have a single-"step" execution (i.e., the executor // is executing the entire model from the ExecutionBuilder). void mapInputsAndOutputsTrivially(); // Map inputs and outputs from ExecutionBuilder to StepExecutor, // one at a time. Note that these are input/output indexes, not // operand indexes. void mapInput(uint32_t builderIndex, uint32_t executorIndex) { mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], &mInputs[executorIndex]); } void mapOutput(uint32_t builderIndex, uint32_t executorIndex) { mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], &mOutputs[executorIndex]); } // The input or output is assumed to have the size of the // corresponding operand. int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) { return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex), memory, offset, &mInputs.at(inputIndex)); } int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) { return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex), memory, offset, &mOutputs.at(outputIndex)); } // Executes using the (driver, preparedModel) specified at construction time. int startCompute(sp* synchronizationCallback); // Executes using the CPU, regardless of the (driver, // preparedModel) specified at construction time. int startComputeOnCpu(sp* synchronizationCallback); bool isCpu() const { return mDriver == nullptr; } private: int allocatePointerArgumentsToPool(std::vector* args, Memory* memory); int startComputeOnDevice(sp* synchronizationCallback); void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, ModelArgumentInfo* executorInputOrOutput); int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand, const Memory* memory, uint32_t offset, ModelArgumentInfo* inputOrOutputInfo); // describes the full (possibly multiple-"step") execution const ExecutionBuilder* mExecutionBuilder; // model to be executed on the executor, in both original and // compiled forms; and device on which to execute it const ModelBuilder* mModel; sp mDriver; // nullptr if CPU execution sp mPreparedModel; // nullptr if CPU execution or if bypassing ExecutionPlan // The information we'll send to the driver about the inputs and outputs. // Note that we build this in two steps: // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. // If set from a pointer, don't set the location in the RequestArgument but store it // instead in mInputBuffers or mOutputBuffers. // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for // the m*Buffers entries. Copy the input values into the shared memory. // We do this to avoid creating a lot of shared memory objects if we have a lot of // parameters specified via pointers. We also avoid copying in the case where // some of the nodes will interpreted on the CPU anyway. std::vector mInputs; std::vector mOutputs; MemoryTracker mMemories; }; } // namespace nn } // namespace android #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H