• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
18 #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
19 
20 #include "Callbacks.h"
21 #include "HalInterfaces.h"
22 #include "Memory.h"
23 #include "ModelBuilder.h"
24 #include "NeuralNetworks.h"
25 
26 #include <unordered_map>
27 #include <vector>
28 
29 using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback;
30 using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback;
31 
32 namespace android {
33 namespace nn {
34 
35 class CompilationBuilder;
36 class ExecutionPlan;
37 class Memory;
38 class ModelBuilder;
39 class StepExecutor;
40 
41 // TODO move length out of DataLocation
42 struct ModelArgumentInfo {
43     // Whether the argument was specified as being in a Memory, as a pointer,
44     // has no value, or has not been specified.
45     // If POINTER then:
46     //   locationAndLength.length is valid.
47     //   dimensions is valid.
48     //   buffer is valid
49     // If MEMORY then:
50     //   locationAndLength.location.{poolIndex, offset, length} is valid.
51     //   dimensions is valid.
52     enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED;
53     DataLocation locationAndLength;
54     std::vector<uint32_t> dimensions;
55     void* buffer;
56 
57     int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
58                        uint32_t length);
59     int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
60                       uint32_t poolIndex, uint32_t offset, uint32_t length);
61     int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset);
62     int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
63 };
64 
65 class ExecutionBuilder {
66     friend class StepExecutor;
67 public:
68     ExecutionBuilder(const CompilationBuilder* compilation);
69 
70     int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
71                  size_t length);
72     int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
73                            const Memory* memory, size_t offset, size_t length);
74     int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
75                   size_t length);
76     int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
77                             const Memory* memory, size_t offset, size_t length);
78     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
79 
getModel()80     const ModelBuilder* getModel() const { return mModel; }
81 
82 private:
83     const ModelBuilder* mModel;
84     const ExecutionPlan* mPlan;
85 
86     // The information we'll send to the driver about the inputs and outputs.
87     // Note that we build this in two steps:
88     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
89     //    If set from a pointer, don't set the location in the RequestArgument but store it
90     //    instead in mInputBuffers or mOutputBuffers.
91     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
92     //    the m*Buffers entries.  Copy the input values into the shared memory.
93     // We do this to avoid creating a lot of shared memory objects if we have a lot of
94     // parameters specified via pointers.  We also avoid copying in the case where
95     // some of the nodes will interpreted on the CPU anyway.
96     std::vector<ModelArgumentInfo> mInputs;
97     std::vector<ModelArgumentInfo> mOutputs;
98     MemoryTracker mMemories;
99 };
100 
101 // class StepExecutor is used to execute a single "step" in a
102 // potentially multiple step execution process.  The graph associated
103 // with that step is executed in its entirety on a single device (or
104 // on the CPU).
105 class StepExecutor {
106 public:
107     // executionBuilder
108     //     Describes the full (possibly multiple-"step") execution.
109     // model
110     //     The model to be executed by the executor.  Possibly a
111     //     submodel of the model from executionBuilder.
112     // driver, preparedModel
113     //     The device on which to execute the "step", and the prepared
114     //     model to execute on that device.  (Both are nullptr in the
115     //     case of CPU.)
116     StepExecutor(const ExecutionBuilder* executionBuilder,
117                  const ModelBuilder* model,
118                  sp<IDevice> driver, sp<IPreparedModel> preparedModel);
119 
120     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
121     // in the case where we have a single-"step" execution (i.e., the executor
122     // is executing the entire model from the ExecutionBuilder).
123     void mapInputsAndOutputsTrivially();
124 
125     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
126     // one at a time.  Note that these are input/output indexes, not
127     // operand indexes.
mapInput(uint32_t builderIndex,uint32_t executorIndex)128     void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
129         mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex],
130                          &mInputs[executorIndex]);
131     }
mapOutput(uint32_t builderIndex,uint32_t executorIndex)132     void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
133         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
134                          &mOutputs[executorIndex]);
135     }
136 
137     // The input or output is assumed to have the size of the
138     // corresponding operand.
setInputFromTemporaryMemory(uint32_t inputIndex,const Memory * memory,uint32_t offset)139     int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
140         return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex),
141                                                    memory, offset,
142                                                    &mInputs.at(inputIndex));
143     }
setOutputFromTemporaryMemory(uint32_t outputIndex,const Memory * memory,uint32_t offset)144     int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
145         return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex),
146                                                    memory, offset,
147                                                    &mOutputs.at(outputIndex));
148     }
149 
150     // Executes using the (driver, preparedModel) specified at construction time.
151     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
152 
153     // Executes using the CPU, regardless of the (driver,
154     // preparedModel) specified at construction time.
155     int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
156 
isCpu()157     bool isCpu() const { return mDriver == nullptr; }
158 
159 private:
160     int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
161     int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
162 
163     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
164                           ModelArgumentInfo* executorInputOrOutput);
165 
166     int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
167                                             const Memory* memory, uint32_t offset,
168                                             ModelArgumentInfo* inputOrOutputInfo);
169 
170     // describes the full (possibly multiple-"step") execution
171     const ExecutionBuilder* mExecutionBuilder;
172 
173     // model to be executed on the executor, in both original and
174     // compiled forms; and device on which to execute it
175     const ModelBuilder* mModel;
176     sp<IDevice> mDriver;                // nullptr if CPU execution
177     sp<IPreparedModel> mPreparedModel;  // nullptr if CPU execution or if bypassing ExecutionPlan
178 
179     // The information we'll send to the driver about the inputs and outputs.
180     // Note that we build this in two steps:
181     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
182     //    If set from a pointer, don't set the location in the RequestArgument but store it
183     //    instead in mInputBuffers or mOutputBuffers.
184     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
185     //    the m*Buffers entries.  Copy the input values into the shared memory.
186     // We do this to avoid creating a lot of shared memory objects if we have a lot of
187     // parameters specified via pointers.  We also avoid copying in the case where
188     // some of the nodes will interpreted on the CPU anyway.
189     std::vector<ModelArgumentInfo> mInputs;
190     std::vector<ModelArgumentInfo> mOutputs;
191     MemoryTracker mMemories;
192 };
193 
194 } // namespace nn
195 } // namespace android
196 
197 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
198