1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
18 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
19
20 #include "HalInterfaces.h"
21 #include "OperationResolver.h"
22 #include "OperationsUtils.h"
23 #include "Utils.h"
24
25 #include <android-base/macros.h>
26 #include <ui/GraphicBuffer.h>
27 #include <algorithm>
28 #include <optional>
29 #include <vector>
30
31 namespace android {
32 namespace nn {
33
34 // Information we maintain about each operand during execution that
35 // may change during execution.
36 struct RunTimeOperandInfo {
37 // TODO Storing the type here is redundant, as it won't change during execution.
38 OperandType type;
39 // The type and dimensions of the operand. The dimensions can
40 // change at runtime. We include the type because it's useful
41 // to pass together with the dimension to the functions implementing
42 // the operators.
43 //
44 // A dimension being zero has different meanings for different operands at different stages:
45 // - Model inputs:
46 // * Specified in model: implies "dynamic", and must be fully-specified in request.
47 // * Specified in request: illegal.
48 // - Constant operands: illegal.
49 // - Model outputs and internal operands:
50 // * Before evaluation: implies unknown and to be deduced from execution.
51 // * After evaluation:
52 // - If isSufficient reports true: the tensor is zero-sized.
53 // - Otherwise: implies unknown.
54 std::vector<uint32_t> dimensions;
55
56 float scale;
57 int32_t zeroPoint;
58 // Where the operand's data is stored. Check the corresponding
59 // location information in the model to figure out if this points
60 // to memory we have allocated for an temporary operand.
61 uint8_t* buffer;
62 // The length of the buffer.
63 uint32_t length;
64 // Whether this is a temporary variable, a model input, a constant, etc.
65 OperandLifeTime lifetime;
66 // Keeps track of how many operations have yet to make use
67 // of this temporary variable. When the count is decremented to 0,
68 // we free the buffer. For non-temporary variables, this count is
69 // always 0.
70 uint32_t numberOfUsesLeft;
71
72 Operand::ExtraParams extraParams;
73
shapeRunTimeOperandInfo74 Shape shape() const {
75 return {
76 .type = type,
77 .dimensions = dimensions,
78 .scale = scale,
79 .offset = zeroPoint,
80 .extraParams = extraParams,
81 };
82 }
83
isSufficientRunTimeOperandInfo84 bool isSufficient() const {
85 if (isExtensionOperandType(type)) {
86 // We don't know sizes of extension types.
87 return true;
88 }
89 return length >= nonExtensionOperandSizeOfData(type, dimensions);
90 }
91 };
92
93 // Used to keep a pointer to each of the memory pools.
94 //
95 // RunTimePoolInfo references a region of memory. Other RunTimePoolInfo objects
96 // may reference the same region of memory by either:
97 // (1) copying an existing RunTimePoolInfo object, or
98 // (2) creating multiple RunTimePoolInfo objects from the same memory resource
99 // (e.g., "createFromHidlMemory" or "createFromExistingBuffer")
100 //
101 // If the underlying region of memory is mapped by "createFromHidlMemory", the
102 // mapping will be sustained until it is no longer referenced by any
103 // RunTimePoolInfo objects.
104 class RunTimePoolInfo {
105 public:
106 static std::optional<RunTimePoolInfo> createFromHidlMemory(const hidl_memory& hidlMemory);
107 static RunTimePoolInfo createFromExistingBuffer(uint8_t* buffer);
108
109 uint8_t* getBuffer() const;
110 bool update() const;
111 hidl_memory getHidlMemory() const;
112
113 private:
114 class RunTimePoolInfoImpl;
115 RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl);
116
117 std::shared_ptr<const RunTimePoolInfoImpl> mImpl;
118 };
119
120 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
121 const hidl_vec<hidl_memory>& pools);
122
123 // This class is used to execute a model on the CPU.
124 class CpuExecutor {
125 public:
126 // This constructor allows clients of CpuExecutor to provide custom CPU
127 // operation implementations. It is used by a sample driver to test
128 // extension support.
129 //
130 // Note that it is not possible to provide custom CPU implementations for
131 // non-OperationResolver operations (b/124041202).
132 //
133 // The operation resolver must outlive the executor.
CpuExecutor(const IOperationResolver * operationResolver)134 explicit CpuExecutor(const IOperationResolver* operationResolver)
135 : mOperationResolver(operationResolver) {}
136
CpuExecutor()137 CpuExecutor() : CpuExecutor(BuiltinOperationResolver::get()) {}
138
139 // Executes the model. The results will be stored at the locations
140 // specified in the constructor.
141 // The model must outlive the executor. We prevent it from being modified
142 // while this is executing.
143 int run(const Model& model, const Request& request,
144 const std::vector<RunTimePoolInfo>& modelPoolInfos,
145 const std::vector<RunTimePoolInfo>& requestPoolInfos);
146
getOutputShapes()147 const std::vector<OutputShape>& getOutputShapes() const {
148 CHECK(mFinished) << "getOutputShapes() called by an unfinished CpuExecutor.";
149 return mOutputShapes;
150 }
151
152 private:
153 bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
154 const std::vector<RunTimePoolInfo>& requestPoolInfos);
155 // Runs one operation of the graph.
156 int executeOperation(const Operation& entry);
157 // Decrement the usage count for the operands listed. Frees the memory
158 // allocated for any temporary variable with a count of zero.
159 void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
160
161 // Frees the memory allocated for any temporary variable, and sets the
162 // output operand shapes returning to the runtime.
163 void finish(int result);
164
165 // The model and the request that we'll execute. Only valid while run()
166 // is being executed.
167 const Model* mModel = nullptr;
168 const Request* mRequest = nullptr;
169
170 // We're copying the list of all the dimensions from the model, as
171 // these may be modified when we run the operations. Since we're
172 // making a full copy, the indexes used in the operand description
173 // stay valid.
174 // std::vector<uint32_t> mDimensions;
175 // Runtime information about all the operands.
176 std::vector<RunTimeOperandInfo> mOperands;
177
178 // The output operand shapes returning to the runtime.
179 std::vector<OutputShape> mOutputShapes;
180
181 // Whether execution is finished and mOutputShapes is ready
182 bool mFinished = false;
183
184 const IOperationResolver* mOperationResolver;
185 };
186
187 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
188 // the Eigen matrix library.)
189 //
190 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more
191 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
192 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
193 // cores enabled throughout inference computation without too much extra power
194 // consumption afterwards.
195 //
196 // The OpenMP settings are thread-local (applying only to worker threads formed
197 // from that thread), see https://software.intel.com/en-us/node/522688 and
198 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class
199 // ensures that within the scope in which an object is instantiated we use the
200 // right settings (scopes may be nested), as long as no other library changes
201 // them. (Note that in current NNAPI usage only one instance is used in the
202 // CpuExecutor thread).
203 //
204 // TODO(mikie): consider also setting the number of threads used. Using as many
205 // threads as there are cores results in more variable performance: if we don't
206 // get all cores for our threads, the latency is doubled as we wait for one core
207 // to do twice the amount of work. Reality is complicated though as not all
208 // cores are the same. Decision to be based on benchmarking against a
209 // representative set of workloads and devices. I'm keeping the code here for
210 // reference.
211 // b/109953668, disable OpenMP
212 #ifdef NNAPI_OPENMP
213 class ScopedOpenmpSettings {
214 public:
215 ScopedOpenmpSettings();
216 ~ScopedOpenmpSettings();
217 DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings);
218 private:
219 int mBlocktimeInitial;
220 #if NNAPI_LIMIT_CPU_THREADS
221 int mMaxThreadsInitial;
222 #endif
223 };
224 #endif // NNAPI_OPENMP
225
226
227 namespace {
228
229 template <typename T>
getScalarData(const RunTimeOperandInfo & info)230 T getScalarData(const RunTimeOperandInfo& info) {
231 // TODO: Check buffer is at least as long as size of data.
232 T* data = reinterpret_cast<T*>(info.buffer);
233 return data[0];
234 }
235
IsNullInput(const RunTimeOperandInfo * input)236 inline bool IsNullInput(const RunTimeOperandInfo *input) {
237 return input->lifetime == OperandLifeTime::NO_VALUE;
238 }
239
NumInputsWithValues(const Operation & operation,std::vector<RunTimeOperandInfo> & operands)240 inline int NumInputsWithValues(const Operation &operation,
241 std::vector<RunTimeOperandInfo> &operands) {
242 const std::vector<uint32_t> &inputs = operation.inputs;
243 return std::count_if(inputs.begin(), inputs.end(),
244 [&operands](uint32_t i) {
245 return !IsNullInput(&operands[i]);
246 });
247 }
248
NumOutputs(const Operation & operation)249 inline int NumOutputs(const Operation &operation) {
250 return operation.outputs.size();
251 }
252
NumDimensions(const RunTimeOperandInfo * operand)253 inline size_t NumDimensions(const RunTimeOperandInfo *operand) {
254 return operand->shape().dimensions.size();
255 }
256
SizeOfDimension(const RunTimeOperandInfo * operand,int i)257 inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) {
258 return operand->shape().dimensions[i];
259 }
260
GetInput(const Operation & operation,std::vector<RunTimeOperandInfo> & operands,int index)261 inline RunTimeOperandInfo *GetInput(const Operation &operation,
262 std::vector<RunTimeOperandInfo> &operands,
263 int index) {
264 return &operands[operation.inputs[index]];
265 }
266
GetOutput(const Operation & operation,std::vector<RunTimeOperandInfo> & operands,int index)267 inline RunTimeOperandInfo *GetOutput(const Operation &operation,
268 std::vector<RunTimeOperandInfo> &operands,
269 int index) {
270 return &operands[operation.outputs[index]];
271 }
272
273 } // anonymous namespace
274
275 } // namespace nn
276 } // namespace android
277
278 #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
279