1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "CpuExecutor"
18
19 #include "CpuExecutor.h"
20
21 #include "NeuralNetworks.h"
22 #include "OperationResolver.h"
23 #include "Operations.h"
24 #include "OperationsUtils.h"
25 #include "Tracing.h"
26
27 #include "Eigen/Core"
28 // b/109953668, disable OpenMP
29 #ifdef NNAPI_OPENMP
30 #include <omp.h>
31 #endif // NNAPI_OPENMP
32 #include <android/hardware_buffer.h>
33 #include <sys/mman.h>
34
35 namespace android {
36 namespace nn {
37
38 namespace {
39
40 class OperationExecutionContext : public IOperationExecutionContext {
41 DISALLOW_IMPLICIT_CONSTRUCTORS(OperationExecutionContext);
42
43 public:
OperationExecutionContext(const Operation * operation,RunTimeOperandInfo * operands)44 OperationExecutionContext(const Operation* operation, RunTimeOperandInfo* operands)
45 : operation(operation), operands(operands) {}
46
47 uint32_t getNumInputs() const override;
48 OperandType getInputType(uint32_t index) const override;
49 Shape getInputShape(uint32_t index) const override;
50 const void* getInputBuffer(uint32_t index) const override;
51 const Operand::ExtraParams getInputExtraParams(uint32_t index) const override;
52
53 uint32_t getNumOutputs() const override;
54 OperandType getOutputType(uint32_t index) const override;
55 Shape getOutputShape(uint32_t index) const override;
56 void* getOutputBuffer(uint32_t index) override;
57
58 // Return false on failure and store the result code.
59 // Use getResultCode() to retrieve it at the end of the operation execution.
60 bool setOutputShape(uint32_t index, const Shape& shape) override;
61 int getResultCode() const;
62
63 bool isOmittedInput(uint32_t index) const override;
64 bool isOmittedOutput(uint32_t index) const override;
65
66 // Return false if any of inputs or outputs is omitted, i.e. has lifetime of NO_VALUE.
67 bool checkNoOmittedOperand() const;
68 // Return false if any of inputs has dimension 0.
69 bool checkNoZeroSizedInput() const;
70
71 private:
72 const RunTimeOperandInfo* getInputInfo(uint32_t index) const;
73 const RunTimeOperandInfo* getOutputInfo(uint32_t index) const;
74 RunTimeOperandInfo* getOutputInfo(uint32_t index);
75
76 const Operation* operation;
77 RunTimeOperandInfo* operands;
78
79 int result = ANEURALNETWORKS_NO_ERROR;
80 };
81
getInputInfo(uint32_t index) const82 const RunTimeOperandInfo* OperationExecutionContext::getInputInfo(uint32_t index) const {
83 CHECK(index < operation->inputs.size());
84 return &operands[operation->inputs[index]];
85 }
86
getOutputInfo(uint32_t index) const87 const RunTimeOperandInfo* OperationExecutionContext::getOutputInfo(uint32_t index) const {
88 CHECK(index < operation->outputs.size());
89 return &operands[operation->outputs[index]];
90 }
91
getOutputInfo(uint32_t index)92 RunTimeOperandInfo* OperationExecutionContext::getOutputInfo(uint32_t index) {
93 CHECK(index < operation->outputs.size());
94 return &operands[operation->outputs[index]];
95 }
96
getInputType(uint32_t index) const97 OperandType OperationExecutionContext::getInputType(uint32_t index) const {
98 return getInputInfo(index)->type;
99 }
100
getInputShape(uint32_t index) const101 Shape OperationExecutionContext::getInputShape(uint32_t index) const {
102 return getInputInfo(index)->shape();
103 }
104
getInputBuffer(uint32_t index) const105 const void* OperationExecutionContext::getInputBuffer(uint32_t index) const {
106 return getInputInfo(index)->buffer;
107 }
108
getInputExtraParams(uint32_t index) const109 const Operand::ExtraParams OperationExecutionContext::getInputExtraParams(uint32_t index) const {
110 return getInputInfo(index)->extraParams;
111 }
112
getOutputType(uint32_t index) const113 OperandType OperationExecutionContext::getOutputType(uint32_t index) const {
114 return getOutputInfo(index)->type;
115 }
116
getOutputShape(uint32_t index) const117 Shape OperationExecutionContext::getOutputShape(uint32_t index) const {
118 return getOutputInfo(index)->shape();
119 }
120
getOutputBuffer(uint32_t index)121 void* OperationExecutionContext::getOutputBuffer(uint32_t index) {
122 return getOutputInfo(index)->buffer;
123 }
124
getNumInputs() const125 uint32_t OperationExecutionContext::getNumInputs() const {
126 return operation->inputs.size();
127 }
128
getNumOutputs() const129 uint32_t OperationExecutionContext::getNumOutputs() const {
130 return operation->outputs.size();
131 }
132
getResultCode() const133 int OperationExecutionContext::getResultCode() const {
134 return result;
135 }
136
137 // TODO: Return error code directly once we've fully integrated OperationResolver with all ops.
138 // Updates the RunTimeOperandInfo with the newly calculated shape.
139 // Allocate the buffer if we need to.
setInfoAndAllocateIfNeeded(RunTimeOperandInfo * info,const Shape & shape,int * result)140 bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape, int* result) {
141 // For user-provided model output operands, the parameters must match the Shape
142 // calculated from the preparation step.
143 if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
144 if (info->type != shape.type) {
145 LOG(ERROR) << "Invalid type for model output";
146 *result = ANEURALNETWORKS_OP_FAILED;
147 return false;
148 }
149 if (info->type == OperandType::TENSOR_QUANT8_ASYMM) {
150 if (info->scale != shape.scale) {
151 LOG(ERROR) << "Invalid scale for model output";
152 *result = ANEURALNETWORKS_OP_FAILED;
153 return false;
154 }
155 if (info->zeroPoint != shape.offset) {
156 LOG(ERROR) << "Invalid zeroPoint for model output";
157 *result = ANEURALNETWORKS_OP_FAILED;
158 return false;
159 }
160 }
161 if (info->extraParams != shape.extraParams) {
162 LOG(ERROR) << "Invalid extraParams for model output";
163 *result = ANEURALNETWORKS_OP_FAILED;
164 return false;
165 }
166 }
167
168 std::vector<uint32_t> combined;
169 if (!combineDimensions(shape.dimensions, info->dimensions, &combined)) {
170 LOG(ERROR) << "Invalid dimensions for model operand";
171 *result = ANEURALNETWORKS_OP_FAILED;
172 return false;
173 }
174 info->dimensions = combined;
175 info->type = shape.type;
176 info->scale = shape.scale;
177 info->zeroPoint = shape.offset;
178 info->extraParams = shape.extraParams;
179
180 // Allocate the buffer only if the combined dimension is fully specified
181 if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
182 if (isExtensionOperandType(info->type)) {
183 LOG(ERROR) << "Cannot allocate a temporary variable of an extension type";
184 *result = ANEURALNETWORKS_OP_FAILED;
185 return false;
186 }
187 uint32_t length = nonExtensionOperandSizeOfData(info->type, info->dimensions);
188 if (length > 0) {
189 info->buffer = new uint8_t[length];
190 if (info->buffer == nullptr) {
191 *result = ANEURALNETWORKS_OUT_OF_MEMORY;
192 return false;
193 }
194 info->length = length;
195 }
196 }
197 if (!info->isSufficient()) {
198 uint32_t length = nonExtensionOperandSizeOfData(info->type, info->dimensions);
199 LOG(ERROR) << "Insufficient size for model operand: require = " << length
200 << ", provided = " << info->length;
201 *result = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
202 return false;
203 }
204 *result = ANEURALNETWORKS_NO_ERROR;
205 return true;
206 }
207
setOutputShape(uint32_t index,const Shape & shape)208 bool OperationExecutionContext::setOutputShape(uint32_t index, const Shape& shape) {
209 return setInfoAndAllocateIfNeeded(getOutputInfo(index), shape, &result);
210 }
211
isOmittedInput(uint32_t index) const212 bool OperationExecutionContext::isOmittedInput(uint32_t index) const {
213 return getInputInfo(index)->lifetime == OperandLifeTime::NO_VALUE;
214 }
215
isOmittedOutput(uint32_t index) const216 bool OperationExecutionContext::isOmittedOutput(uint32_t index) const {
217 return getOutputInfo(index)->lifetime == OperandLifeTime::NO_VALUE;
218 }
219
checkNoOmittedOperand() const220 bool OperationExecutionContext::checkNoOmittedOperand() const {
221 for (uint32_t i = 0; i < operation->inputs.size(); i++) {
222 NN_RET_CHECK(!isOmittedInput(i)) << getOperationName(operation->type) << " input operand "
223 << i << " is required but missing.";
224 }
225 for (uint32_t i = 0; i < operation->outputs.size(); i++) {
226 NN_RET_CHECK(!isOmittedOutput(i)) << getOperationName(operation->type) << " output operand "
227 << i << " is required but missing.";
228 }
229 return true;
230 }
231
checkNoZeroSizedInput() const232 bool OperationExecutionContext::checkNoZeroSizedInput() const {
233 for (uint32_t i = 0; i < operation->inputs.size(); i++) {
234 if (isOmittedInput(i)) continue;
235 for (uint32_t j = 0; j < getInputInfo(i)->dimensions.size(); j++) {
236 NN_RET_CHECK_NE(getInputInfo(i)->dimensions[j], 0)
237 << getOperationName(operation->type)
238 << " does not support zero-sized tensor, but input " << i << " dimension " << j
239 << " is 0.";
240 }
241 }
242 return true;
243 }
244
245 } // namespace
246
247 // Used to keep a pointer to a memory pool.
248 //
249 // In the case of an "mmap_fd" pool, owns the mmap region
250 // returned by getBuffer() -- i.e., that region goes away
251 // when the RunTimePoolInfo is destroyed or is assigned to.
252 class RunTimePoolInfo::RunTimePoolInfoImpl {
253 public:
254 RunTimePoolInfoImpl(const hidl_memory& hidlMemory, uint8_t* buffer, const sp<IMemory>& memory,
255 const sp<GraphicBuffer>& graphicBuffer);
256
257 // rule of five...
258 ~RunTimePoolInfoImpl();
259 RunTimePoolInfoImpl(const RunTimePoolInfoImpl&) = delete;
260 RunTimePoolInfoImpl(RunTimePoolInfoImpl&&) noexcept = delete;
261 RunTimePoolInfoImpl& operator=(const RunTimePoolInfoImpl&) = delete;
262 RunTimePoolInfoImpl& operator=(RunTimePoolInfoImpl&&) noexcept = delete;
263
getBuffer() const264 uint8_t* getBuffer() const { return mBuffer; }
265
266 bool update() const;
267
getHidlMemory() const268 hidl_memory getHidlMemory() const { return mHidlMemory; }
269
270 private:
271 const hidl_memory mHidlMemory; // always used
272 uint8_t* const mBuffer = nullptr; // always used
273 const sp<IMemory> mMemory; // only used when hidlMemory.name() == "ashmem"
274 const sp<GraphicBuffer>
275 mGraphicBuffer; // only used when hidlMemory.name() == "hardware_buffer_blob"
276 };
277
RunTimePoolInfoImpl(const hidl_memory & hidlMemory,uint8_t * buffer,const sp<IMemory> & memory,const sp<GraphicBuffer> & graphicBuffer)278 RunTimePoolInfo::RunTimePoolInfoImpl::RunTimePoolInfoImpl(const hidl_memory& hidlMemory,
279 uint8_t* buffer,
280 const sp<IMemory>& memory,
281 const sp<GraphicBuffer>& graphicBuffer)
282 : mHidlMemory(hidlMemory), mBuffer(buffer), mMemory(memory), mGraphicBuffer(graphicBuffer) {}
283
~RunTimePoolInfoImpl()284 RunTimePoolInfo::RunTimePoolInfoImpl::~RunTimePoolInfoImpl() {
285 if (mBuffer == nullptr) {
286 return;
287 }
288
289 const std::string memType = mHidlMemory.name();
290 if (memType == "ashmem") {
291 // nothing to do
292 } else if (memType == "mmap_fd") {
293 const size_t size = mHidlMemory.size();
294 if (munmap(mBuffer, size)) {
295 LOG(ERROR) << "RunTimePoolInfoImpl::~RunTimePoolInfo(): Can't munmap";
296 }
297 } else if (memType == "hardware_buffer_blob") {
298 mGraphicBuffer->unlock();
299 } else if (memType == "") {
300 // Represents a POINTER argument; nothing to do
301 } else {
302 LOG(ERROR) << "RunTimePoolInfoImpl::~RunTimePoolInfoImpl(): unsupported hidl_memory type";
303 }
304 }
305
306 // Making sure the output data are correctly updated after execution.
update() const307 bool RunTimePoolInfo::RunTimePoolInfoImpl::update() const {
308 const std::string memType = mHidlMemory.name();
309 if (memType == "ashmem") {
310 mMemory->commit();
311 return true;
312 }
313 if (memType == "mmap_fd") {
314 int prot = mHidlMemory.handle()->data[1];
315 if (prot & PROT_WRITE) {
316 const size_t size = mHidlMemory.size();
317 return msync(mBuffer, size, MS_SYNC) == 0;
318 }
319 }
320 // No-op for other types of memory.
321 return true;
322 }
323
324 // TODO: short term, make share memory mapping and updating a utility function.
325 // TODO: long term, implement mmap_fd as a hidl IMemory service.
createFromHidlMemory(const hidl_memory & hidlMemory)326 std::optional<RunTimePoolInfo> RunTimePoolInfo::createFromHidlMemory(
327 const hidl_memory& hidlMemory) {
328 uint8_t* buffer = nullptr;
329 sp<IMemory> memory;
330 sp<GraphicBuffer> graphicBuffer;
331
332 const auto& memType = hidlMemory.name();
333 if (memType == "ashmem") {
334 memory = mapMemory(hidlMemory);
335 if (memory == nullptr) {
336 LOG(ERROR) << "Can't map shared memory.";
337 return std::nullopt;
338 }
339 memory->update();
340 buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
341 if (buffer == nullptr) {
342 LOG(ERROR) << "Can't access shared memory.";
343 return std::nullopt;
344 }
345 } else if (memType == "mmap_fd") {
346 size_t size = hidlMemory.size();
347 int fd = hidlMemory.handle()->data[0];
348 int prot = hidlMemory.handle()->data[1];
349 size_t offset = getSizeFromInts(hidlMemory.handle()->data[2], hidlMemory.handle()->data[3]);
350 buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
351 if (buffer == MAP_FAILED) {
352 LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
353 return std::nullopt;
354 }
355 } else if (memType == "hardware_buffer_blob") {
356 auto handle = hidlMemory.handle();
357 auto format = AHARDWAREBUFFER_FORMAT_BLOB;
358 auto usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN;
359 const uint32_t width = hidlMemory.size();
360 const uint32_t height = 1; // height is always 1 for BLOB mode AHardwareBuffer.
361 const uint32_t layers = 1; // layers is always 1 for BLOB mode AHardwareBuffer.
362 const uint32_t stride = hidlMemory.size();
363 graphicBuffer = new GraphicBuffer(handle, GraphicBuffer::HandleWrapMethod::CLONE_HANDLE,
364 width, height, format, layers, usage, stride);
365 void* gBuffer = nullptr;
366 int32_t outBytesPerPixel, outBytesPerStride;
367 status_t status =
368 graphicBuffer->lock(usage, &gBuffer, &outBytesPerPixel, &outBytesPerStride);
369 if (status != NO_ERROR) {
370 LOG(ERROR) << "RunTimePoolInfo Can't lock the AHardwareBuffer.";
371 return std::nullopt;
372 }
373 buffer = static_cast<uint8_t*>(gBuffer);
374 } else {
375 LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type";
376 return std::nullopt;
377 }
378
379 const auto impl =
380 std::make_shared<const RunTimePoolInfoImpl>(hidlMemory, buffer, memory, graphicBuffer);
381 return {RunTimePoolInfo(impl)};
382 }
383
createFromExistingBuffer(uint8_t * buffer)384 RunTimePoolInfo RunTimePoolInfo::createFromExistingBuffer(uint8_t* buffer) {
385 const auto impl =
386 std::make_shared<const RunTimePoolInfoImpl>(hidl_memory{}, buffer, nullptr, nullptr);
387 return {impl};
388 }
389
RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl> & impl)390 RunTimePoolInfo::RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl)
391 : mImpl(impl) {}
392
getBuffer() const393 uint8_t* RunTimePoolInfo::getBuffer() const {
394 return mImpl->getBuffer();
395 }
396
update() const397 bool RunTimePoolInfo::update() const {
398 return mImpl->update();
399 }
400
getHidlMemory() const401 hidl_memory RunTimePoolInfo::getHidlMemory() const {
402 return mImpl->getHidlMemory();
403 }
404
setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo> * poolInfos,const hidl_vec<hidl_memory> & pools)405 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
406 const hidl_vec<hidl_memory>& pools) {
407 CHECK(poolInfos != nullptr);
408 poolInfos->clear();
409 poolInfos->reserve(pools.size());
410 for (const auto& pool : pools) {
411 if (std::optional<RunTimePoolInfo> poolInfo = RunTimePoolInfo::createFromHidlMemory(pool)) {
412 poolInfos->push_back(*poolInfo);
413 } else {
414 LOG(ERROR) << "Could not map pools";
415 poolInfos->clear();
416 return false;
417 }
418 }
419 return true;
420 }
421
422 template <typename T>
convertToNhwcImpl(T * to,const T * from,const std::vector<uint32_t> & fromDim)423 inline bool convertToNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) {
424 uint32_t spatialSize = fromDim[2] * fromDim[3];
425 for (uint32_t n = 0; n < fromDim[0]; n++) {
426 for (uint32_t hw = 0; hw < spatialSize; hw++) {
427 for (uint32_t c = 0; c < fromDim[1]; c++) {
428 uint32_t fromIndex = n * fromDim[1] * spatialSize + c * spatialSize + hw;
429 *to++ = from[fromIndex];
430 }
431 }
432 }
433 return true;
434 }
435
436 template <typename T>
convertFromNhwcImpl(T * to,const T * from,const std::vector<uint32_t> & fromDim)437 inline bool convertFromNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) {
438 uint32_t spatialSize = fromDim[1] * fromDim[2];
439 for (uint32_t n = 0; n < fromDim[0]; n++) {
440 for (uint32_t c = 0; c < fromDim[3]; c++) {
441 for (uint32_t hw = 0; hw < spatialSize; hw++) {
442 uint32_t fromIndex = n * spatialSize * fromDim[3] + hw * fromDim[3] + c;
443 *to++ = from[fromIndex];
444 }
445 }
446 }
447 return true;
448 }
449
convertToNhwc(RunTimeOperandInfo & to,const RunTimeOperandInfo & from,std::unique_ptr<uint8_t[]> & ptr_guard,bool data_layout)450 static bool convertToNhwc(RunTimeOperandInfo& to, const RunTimeOperandInfo& from,
451 std::unique_ptr<uint8_t[]>& ptr_guard, bool data_layout) {
452 int result;
453 if (from.dimensions.size() != 4) {
454 LOG(ERROR) << "Error converting a non-4-D tensor to NHWC layout";
455 return false;
456 }
457 to.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
458 if (data_layout) {
459 // convert dimensions
460 Shape inShape = from.shape();
461 auto& fromDim = from.dimensions;
462 inShape.dimensions = {fromDim[0], fromDim[2], fromDim[3], fromDim[1]};
463 // allocate buffer
464 to.buffer = nullptr;
465 if (!setInfoAndAllocateIfNeeded(&to, inShape, &result)) {
466 return false;
467 }
468 ptr_guard.reset(to.buffer);
469 // convert value
470 if (from.type == OperandType::TENSOR_FLOAT32) {
471 return convertToNhwcImpl<float>(reinterpret_cast<float*>(to.buffer),
472 reinterpret_cast<const float*>(from.buffer), fromDim);
473 } else if (from.type == OperandType::TENSOR_FLOAT16) {
474 return convertToNhwcImpl<_Float16>(reinterpret_cast<_Float16*>(to.buffer),
475 reinterpret_cast<const _Float16*>(from.buffer),
476 fromDim);
477 } else if (from.type == OperandType::TENSOR_QUANT8_ASYMM) {
478 return convertToNhwcImpl<uint8_t>(reinterpret_cast<uint8_t*>(to.buffer),
479 reinterpret_cast<const uint8_t*>(from.buffer),
480 fromDim);
481 } else {
482 LOG(ERROR) << "Unsupported data type";
483 return false;
484 }
485 } else {
486 to = from;
487 }
488 return true;
489 }
490
convertFromNhwc(RunTimeOperandInfo & to,const RunTimeOperandInfo & from,bool data_layout,int * result)491 static bool convertFromNhwc(RunTimeOperandInfo& to, const RunTimeOperandInfo& from,
492 bool data_layout, int* result) {
493 if (from.dimensions.size() != 4) {
494 LOG(ERROR) << "Error converting a non-4-D tensor from NHWC layout";
495 return false;
496 }
497 if (data_layout) {
498 // convert dimensions
499 Shape outShape = from.shape();
500 auto& fromDim = from.dimensions;
501 outShape.dimensions = {fromDim[0], fromDim[3], fromDim[1], fromDim[2]};
502 // allocate buffer
503 if (!setInfoAndAllocateIfNeeded(&to, outShape, result)) {
504 return false;
505 }
506 // convert value
507 if (from.type == OperandType::TENSOR_FLOAT32) {
508 return convertFromNhwcImpl<float>(reinterpret_cast<float*>(to.buffer),
509 reinterpret_cast<const float*>(from.buffer), fromDim);
510 } else if (from.type == OperandType::TENSOR_FLOAT16) {
511 return convertFromNhwcImpl<_Float16>(reinterpret_cast<_Float16*>(to.buffer),
512 reinterpret_cast<const _Float16*>(from.buffer),
513 fromDim);
514 } else if (from.type == OperandType::TENSOR_QUANT8_ASYMM) {
515 return convertFromNhwcImpl<uint8_t>(reinterpret_cast<uint8_t*>(to.buffer),
516 reinterpret_cast<const uint8_t*>(from.buffer),
517 fromDim);
518 } else {
519 LOG(ERROR) << "Unsupported data type";
520 return false;
521 }
522 } else {
523 Shape outShape = from.shape();
524 to.buffer = from.buffer;
525 to.length = from.length;
526 if (!setInfoAndAllocateIfNeeded(&to, outShape, result)) {
527 return false;
528 }
529 }
530 return true;
531 }
532
533 // Ignore the .pools entry in model and request. This will have been taken care of
534 // by the caller.
run(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)535 int CpuExecutor::run(const Model& model, const Request& request,
536 const std::vector<RunTimePoolInfo>& modelPoolInfos,
537 const std::vector<RunTimePoolInfo>& requestPoolInfos) {
538 NNTRACE_CPU(NNTRACE_PHASE_EXECUTION, "run");
539 VLOG(CPUEXE) << "CpuExecutor::run() with request(" << SHOW_IF_DEBUG(toString(request)) << ")";
540
541 // b/109953668, disable OpenMP
542 #ifdef NNAPI_OPENMP
543 ScopedOpenmpSettings openMpSettings;
544 #endif // NNAPI_OPENMP
545
546 mModel = &model;
547 mRequest = &request; // TODO check if mRequest is needed
548 initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
549 // The model has serialized the operation in execution order.
550 for (const auto& operation : model.operations) {
551 int n = executeOperation(operation);
552 if (n != ANEURALNETWORKS_NO_ERROR) {
553 finish(n);
554 return n;
555 }
556 }
557 for (auto& runtimeInfo : modelPoolInfos) {
558 runtimeInfo.update();
559 }
560 for (auto& runtimeInfo : requestPoolInfos) {
561 runtimeInfo.update();
562 }
563 finish(ANEURALNETWORKS_NO_ERROR);
564 VLOG(CPUEXE) << "Completed run normally";
565 return ANEURALNETWORKS_NO_ERROR;
566 }
567
initializeRunTimeInfo(const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)568 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
569 const std::vector<RunTimePoolInfo>& requestPoolInfos) {
570 VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
571 const size_t count = mModel->operands.size();
572 mOperands.resize(count);
573
574 // Start by setting the runtime info to what's in the model.
575 for (size_t i = 0; i < count; i++) {
576 const Operand& from = mModel->operands[i];
577 RunTimeOperandInfo& to = mOperands[i];
578 to.type = from.type;
579 to.dimensions = from.dimensions;
580 to.scale = from.scale;
581 to.zeroPoint = from.zeroPoint;
582 to.length = from.location.length;
583 to.lifetime = from.lifetime;
584 to.extraParams = from.extraParams;
585 switch (from.lifetime) {
586 case OperandLifeTime::TEMPORARY_VARIABLE:
587 to.buffer = nullptr;
588 to.numberOfUsesLeft = from.numberOfConsumers;
589 break;
590 case OperandLifeTime::CONSTANT_COPY:
591 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
592 to.numberOfUsesLeft = 0;
593 break;
594 case OperandLifeTime::CONSTANT_REFERENCE: {
595 auto poolIndex = from.location.poolIndex;
596 nnAssert(poolIndex < modelPoolInfos.size());
597 auto& r = modelPoolInfos[poolIndex];
598 to.buffer = r.getBuffer() + from.location.offset;
599 to.numberOfUsesLeft = 0;
600 break;
601 }
602 case OperandLifeTime::MODEL_INPUT:
603 case OperandLifeTime::MODEL_OUTPUT:
604 case OperandLifeTime::NO_VALUE:
605 to.buffer = nullptr;
606 to.numberOfUsesLeft = 0;
607 break;
608 default:
609 nnAssert(false);
610 break;
611 }
612 }
613
614 // Adjust the runtime info for the arguments passed to the model,
615 // modifying the buffer location, and possibly the dimensions.
616 auto updateForArguments = [this, &requestPoolInfos](
617 const std::vector<uint32_t>& indexes,
618 const hidl_vec<RequestArgument>& arguments) {
619 nnAssert(indexes.size() == arguments.size());
620 for (size_t i = 0; i < indexes.size(); i++) {
621 const uint32_t operandIndex = indexes[i];
622 const RequestArgument& from = arguments[i];
623 RunTimeOperandInfo& to = mOperands[operandIndex];
624 if (from.dimensions.size() > 0) {
625 // It's the responsibility of the caller to validate that
626 // from.dimensions only modifies the dimensions that were
627 // unspecified in the model. That's the case in SampleDriver.cpp
628 // with the call to validateRequest().
629 // TODO make sure that's the case for the default CPU path.
630 to.dimensions = from.dimensions;
631 }
632 if (from.hasNoValue) {
633 to.lifetime = OperandLifeTime::NO_VALUE;
634 nnAssert(to.buffer == nullptr);
635 to.length = 0;
636 } else {
637 auto poolIndex = from.location.poolIndex;
638 nnAssert(poolIndex < requestPoolInfos.size());
639 auto& r = requestPoolInfos[poolIndex];
640 to.buffer = r.getBuffer() + from.location.offset;
641 to.length = from.location.length;
642 }
643 }
644 };
645 updateForArguments(mModel->inputIndexes, mRequest->inputs);
646 updateForArguments(mModel->outputIndexes, mRequest->outputs);
647
648 return true;
649 }
650
freeNoLongerUsedOperands(const std::vector<uint32_t> & inputs)651 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
652 for (uint32_t i : inputs) {
653 auto& info = mOperands[i];
654 // Check if it's a static or model input/output.
655 if (info.numberOfUsesLeft == 0) {
656 continue;
657 }
658 info.numberOfUsesLeft--;
659 if (info.numberOfUsesLeft == 0 && info.buffer != nullptr) {
660 delete[] info.buffer;
661 info.buffer = nullptr;
662 }
663 }
664 }
665
executeOperation(const Operation & operation)666 int CpuExecutor::executeOperation(const Operation& operation) {
667 // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
668 const hidl_vec<uint32_t>& ins = operation.inputs;
669 const hidl_vec<uint32_t>& outs = operation.outputs;
670 bool success = false;
671 int result = ANEURALNETWORKS_NO_ERROR;
672
673 // Function to verify that the number of input and output parameters
674 // matches what is expected. Also checks that all the parameters have
675 // values. This function is to be used only for operations that do not
676 // accept optional arguments.
677 // TODO Have a version that works for optional arguments.
678 auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
679 size_t requiredOuts) -> bool {
680 auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
681 const char* type) -> bool {
682 size_t actualCount = indexes.size();
683 if (actualCount != requiredCount) {
684 LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type
685 << " operands. Got " << actualCount << " of " << requiredCount;
686 return false;
687 }
688 for (size_t i = 0; i < actualCount; i++) {
689 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
690 LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand "
691 << i << " is required but missing.";
692 return false;
693 }
694 }
695 return true;
696 };
697
698 auto verifyNoZeroSizedInputs = [&operation, this](const hidl_vec<uint32_t>& indexes) {
699 for (size_t i = 0; i < indexes.size(); i++) {
700 for (size_t j = 0; j < mOperands[indexes[i]].dimensions.size(); j++) {
701 if (mOperands[indexes[i]].dimensions[j] == 0) {
702 LOG(ERROR) << getOperationName(operation.type)
703 << " does not support zero-sized tensor, but input " << i
704 << " dimension " << j << " is zero.";
705 return false;
706 }
707 }
708 }
709 return true;
710 };
711
712 return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out") &&
713 verifyNoZeroSizedInputs(ins);
714 };
715
716 switch (operation.type) {
717 case OperationType::OEM_OPERATION: {
718 LOG(ERROR) << "OEM operation not supported for CPU execution";
719 success = false;
720 } break;
721 case OperationType::FLOOR: {
722 if (!allParametersPresent(1, 1)) {
723 return ANEURALNETWORKS_BAD_DATA;
724 }
725 const RunTimeOperandInfo& input = mOperands[ins[0]];
726 RunTimeOperandInfo& output = mOperands[outs[0]];
727 Shape outShape = output.shape();
728
729 if (!floorPrepare(input.shape(), &outShape) ||
730 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
731 break;
732 }
733 if (input.type == OperandType::TENSOR_FLOAT32) {
734 success = floorFloat32(reinterpret_cast<const float*>(input.buffer),
735 reinterpret_cast<float*>(output.buffer), outShape);
736 } else if (input.type == OperandType::TENSOR_FLOAT16) {
737 success = floorFloat16(reinterpret_cast<const _Float16*>(input.buffer),
738 reinterpret_cast<_Float16*>(output.buffer), outShape);
739 }
740 } break;
741 case OperationType::DEPTHWISE_CONV_2D: {
742 const size_t inCount = ins.size();
743 if ((inCount != 14 && inCount != 12 && inCount != 11 && inCount != 9 && inCount != 8) ||
744 !allParametersPresent(inCount, 1)) {
745 return ANEURALNETWORKS_BAD_DATA;
746 }
747 const RunTimeOperandInfo& input = mOperands[ins[0]];
748 const RunTimeOperandInfo& filter = mOperands[ins[1]];
749 const RunTimeOperandInfo& bias = mOperands[ins[2]];
750
751 int32_t padding_left, padding_right;
752 int32_t padding_top, padding_bottom;
753 int32_t padding_implicit = 0;
754 int32_t stride_width, stride_height;
755 int32_t dilation_width_factor = 1, dilation_height_factor = 1;
756 int32_t depth_multiplier;
757 int32_t activation;
758 bool data_layout = false;
759 bool useImplicitPadding = false;
760
761 if ((inCount >= 9 && mOperands[ins[8]].type == OperandType::BOOL) || inCount == 8) {
762 padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
763 stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
764 stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
765 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
766 activation = getScalarData<int32_t>(mOperands[ins[7]]);
767 if (inCount >= 9) {
768 data_layout = getScalarData<bool>(mOperands[ins[8]]);
769 }
770 if (inCount == 11) {
771 dilation_width_factor = getScalarData<int32_t>(mOperands[ins[9]]);
772 dilation_height_factor = getScalarData<int32_t>(mOperands[ins[10]]);
773 }
774 useImplicitPadding = true;
775 } else if (inCount >= 11 && mOperands[ins[8]].type == OperandType::INT32) {
776 padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
777 padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
778 padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
779 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
780 stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
781 stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
782 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
783 activation = getScalarData<int32_t>(mOperands[ins[10]]);
784 if (inCount >= 12) {
785 data_layout = getScalarData<bool>(mOperands[ins[11]]);
786 }
787 if (inCount == 14) {
788 dilation_width_factor = getScalarData<int32_t>(mOperands[ins[12]]);
789 dilation_height_factor = getScalarData<int32_t>(mOperands[ins[13]]);
790 }
791 } else {
792 return ANEURALNETWORKS_BAD_DATA;
793 }
794
795 RunTimeOperandInfo& output = mOperands[outs[0]];
796 Shape outShape = output.shape();
797
798 RunTimeOperandInfo input_tmp, output_tmp;
799 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
800 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
801 success = false;
802 break;
803 }
804 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
805 output_tmp.buffer = data_layout ? nullptr : output.buffer;
806 output_tmp.length = data_layout ? 0 : output.length;
807
808 if (useImplicitPadding) {
809 Shape inputShape = input_tmp.shape();
810 Shape filterShape = filter.shape();
811 int32_t input_width = getSizeOfDimension(inputShape, 2);
812 int32_t input_height = getSizeOfDimension(inputShape, 1);
813 int32_t filter_width = getSizeOfDimension(filterShape, 2);
814 int32_t filter_height = getSizeOfDimension(filterShape, 1);
815 calculateExplicitPadding(input_width, stride_width, dilation_width_factor,
816 filter_width, padding_implicit, &padding_left,
817 &padding_right);
818 calculateExplicitPadding(input_height, stride_height, dilation_height_factor,
819 filter_height, padding_implicit, &padding_top,
820 &padding_bottom);
821 }
822
823 if (!depthwiseConvPrepare(input_tmp.shape(), filter.shape(), bias.shape(), padding_left,
824 padding_right, padding_top, padding_bottom, stride_width,
825 stride_height, depth_multiplier, dilation_width_factor,
826 dilation_height_factor, &outShape) ||
827 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
828 if (!data_layout) output.dimensions = output_tmp.dimensions;
829 success = false;
830 break;
831 }
832 if (input_tmp.type == OperandType::TENSOR_FLOAT32) {
833 success = depthwiseConvFloat32(
834 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
835 reinterpret_cast<const float*>(filter.buffer), filter.shape(),
836 reinterpret_cast<const float*>(bias.buffer), bias.shape(), padding_left,
837 padding_right, padding_top, padding_bottom, stride_width, stride_height,
838 dilation_width_factor, dilation_height_factor, depth_multiplier, activation,
839 reinterpret_cast<float*>(output_tmp.buffer), outShape);
840 } else if (input_tmp.type == OperandType::TENSOR_FLOAT16) {
841 success = depthwiseConvFloat16(
842 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
843 reinterpret_cast<const _Float16*>(filter.buffer), filter.shape(),
844 reinterpret_cast<const _Float16*>(bias.buffer), bias.shape(), padding_left,
845 padding_right, padding_top, padding_bottom, stride_width, stride_height,
846 dilation_width_factor, dilation_height_factor, depth_multiplier, activation,
847 reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
848 } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) {
849 if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
850 success = depthwiseConvQuant8PerChannel(
851 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
852 reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(),
853 filter.extraParams.channelQuant().scales.data(),
854 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
855 padding_left, padding_right, padding_top, padding_bottom, stride_width,
856 stride_height, dilation_width_factor, dilation_height_factor,
857 depth_multiplier, activation,
858 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
859 } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) {
860 success = depthwiseConvQuant8(
861 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
862 reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(),
863 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
864 padding_left, padding_right, padding_top, padding_bottom, stride_width,
865 stride_height, dilation_width_factor, dilation_height_factor,
866 depth_multiplier, activation,
867 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
868 }
869 }
870 if (data_layout) {
871 output_tmp_guard.reset(output_tmp.buffer);
872 }
873 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
874 success = false;
875 break;
876 }
877 } break;
878 case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
879 const size_t inCount = ins.size();
880 if ((inCount != 6 && inCount != 5) || !allParametersPresent(inCount, 1)) {
881 return ANEURALNETWORKS_BAD_DATA;
882 }
883 const RunTimeOperandInfo& input = mOperands[ins[0]];
884 int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
885 float bias = (input.type == OperandType::TENSOR_FLOAT16)
886 ? getScalarData<_Float16>(mOperands[ins[2]])
887 : getScalarData<float>(mOperands[ins[2]]);
888 float alpha = (input.type == OperandType::TENSOR_FLOAT16)
889 ? getScalarData<_Float16>(mOperands[ins[3]])
890 : getScalarData<float>(mOperands[ins[3]]);
891 float beta = (input.type == OperandType::TENSOR_FLOAT16)
892 ? getScalarData<_Float16>(mOperands[ins[4]])
893 : getScalarData<float>(mOperands[ins[4]]);
894 const int32_t axis = inCount == 6 ? getScalarData<int32_t>(mOperands[ins[5]]) : -1;
895
896 RunTimeOperandInfo& output = mOperands[outs[0]];
897 Shape outShape = output.shape();
898
899 if (!genericNormalizationPrepare(input.shape(), &outShape) ||
900 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
901 success = false;
902 break;
903 }
904 if (input.type == OperandType::TENSOR_FLOAT32) {
905 success = localResponseNormFloat32(
906 reinterpret_cast<const float*>(input.buffer), input.shape(), radius, bias,
907 alpha, beta, axis, reinterpret_cast<float*>(output.buffer), outShape);
908 } else if (input.type == OperandType::TENSOR_FLOAT16) {
909 success = localResponseNormFloat16(reinterpret_cast<const _Float16*>(input.buffer),
910 input.shape(), radius, bias, alpha, beta, axis,
911 reinterpret_cast<_Float16*>(output.buffer),
912 outShape);
913 }
914 } break;
915 case OperationType::RESHAPE: {
916 if (!allParametersPresent(2, 1)) {
917 return ANEURALNETWORKS_BAD_DATA;
918 }
919 const RunTimeOperandInfo& input = mOperands[ins[0]];
920 const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
921
922 RunTimeOperandInfo& output = mOperands[outs[0]];
923 Shape outShape = output.shape();
924
925 success = reshapePrepare(input.shape(),
926 reinterpret_cast<const int32_t*>(targetShape.buffer),
927 getNumberOfElements(targetShape.shape()), &outShape) &&
928 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
929 copyData(input.buffer, input.shape(), output.buffer, outShape);
930 } break;
931 case OperationType::DEPTH_TO_SPACE: {
932 const size_t inCount = ins.size();
933 if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) {
934 return ANEURALNETWORKS_BAD_DATA;
935 }
936 const RunTimeOperandInfo& input = mOperands[ins[0]];
937 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
938 bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false;
939
940 RunTimeOperandInfo& output = mOperands[outs[0]];
941 Shape outShape = output.shape();
942
943 RunTimeOperandInfo input_tmp, output_tmp;
944 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
945 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
946 success = false;
947 break;
948 }
949 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
950 output_tmp.buffer = data_layout ? nullptr : output.buffer;
951 output_tmp.length = data_layout ? 0 : output.length;
952 if (!depthToSpacePrepare(input_tmp.shape(), blockSize, &outShape) ||
953 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
954 if (!data_layout) output.dimensions = output_tmp.dimensions;
955 break;
956 }
957 switch (input_tmp.type) {
958 case OperandType::TENSOR_FLOAT32: {
959 success = depthToSpaceGeneric(
960 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
961 blockSize, reinterpret_cast<float*>(output_tmp.buffer), outShape);
962 break;
963 }
964 case OperandType::TENSOR_FLOAT16: {
965 success = depthToSpaceGeneric(
966 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
967 blockSize, reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
968 break;
969 }
970 case OperandType::TENSOR_QUANT8_ASYMM: {
971 success = depthToSpaceGeneric(
972 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
973 blockSize, reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
974 break;
975 }
976 default: {
977 LOG(ERROR) << "Unsupported data type";
978 success = false;
979 }
980 }
981 if (data_layout) {
982 output_tmp_guard.reset(output_tmp.buffer);
983 }
984 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
985 success = false;
986 break;
987 }
988 } break;
989 case OperationType::SPACE_TO_DEPTH: {
990 const size_t inCount = ins.size();
991 if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) {
992 return ANEURALNETWORKS_BAD_DATA;
993 }
994 const RunTimeOperandInfo& input = mOperands[ins[0]];
995 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
996 bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false;
997
998 RunTimeOperandInfo& output = mOperands[outs[0]];
999 Shape outShape = output.shape();
1000
1001 RunTimeOperandInfo input_tmp, output_tmp;
1002 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
1003 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
1004 success = false;
1005 break;
1006 }
1007 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
1008 output_tmp.buffer = data_layout ? nullptr : output.buffer;
1009 output_tmp.length = data_layout ? 0 : output.length;
1010
1011 if (!spaceToDepthPrepare(input_tmp.shape(), blockSize, &outShape) ||
1012 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
1013 if (!data_layout) output.dimensions = output_tmp.dimensions;
1014 break;
1015 }
1016 switch (input_tmp.type) {
1017 case OperandType::TENSOR_FLOAT32: {
1018 success = spaceToDepthGeneric(
1019 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
1020 blockSize, reinterpret_cast<float*>(output_tmp.buffer), outShape);
1021 break;
1022 }
1023 case OperandType::TENSOR_FLOAT16: {
1024 success = spaceToDepthGeneric(
1025 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
1026 blockSize, reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
1027 break;
1028 }
1029 case OperandType::TENSOR_QUANT8_ASYMM: {
1030 success = spaceToDepthGeneric(
1031 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
1032 blockSize, reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
1033 break;
1034 }
1035 default: {
1036 LOG(ERROR) << "Unsupported data type";
1037 success = false;
1038 }
1039 }
1040 if (data_layout) {
1041 output_tmp_guard.reset(output_tmp.buffer);
1042 }
1043 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
1044 success = false;
1045 break;
1046 }
1047 } break;
1048 case OperationType::EMBEDDING_LOOKUP: {
1049 const RunTimeOperandInfo& values = mOperands[ins[EmbeddingLookup::kValueTensor]];
1050 const RunTimeOperandInfo& lookups = mOperands[ins[EmbeddingLookup::kLookupTensor]];
1051 RunTimeOperandInfo& output = mOperands[outs[EmbeddingLookup::kOutputTensor]];
1052
1053 Shape outputShape;
1054 EmbeddingLookup lookup(operation, mOperands);
1055
1056 success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
1057 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lookup.Eval();
1058 } break;
1059 case OperationType::HASHTABLE_LOOKUP: {
1060 const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]];
1061 const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]];
1062 const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]];
1063
1064 RunTimeOperandInfo& output = mOperands[outs[HashtableLookup::kOutputTensor]];
1065 RunTimeOperandInfo& hits = mOperands[outs[HashtableLookup::kHitsTensor]];
1066
1067 Shape outputShape, hitShape;
1068 HashtableLookup lookup(operation, mOperands);
1069
1070 success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
1071 &outputShape, &hitShape) &&
1072 setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
1073 setInfoAndAllocateIfNeeded(&hits, hitShape, &result) && lookup.Eval();
1074 } break;
1075 case OperationType::LSH_PROJECTION: {
1076 RunTimeOperandInfo& output = mOperands[outs[LSHProjection::kOutputTensor]];
1077 Shape outputShape;
1078 if (!LSHProjection::Prepare(operation, mOperands, &outputShape) ||
1079 !setInfoAndAllocateIfNeeded(&output, outputShape, &result)) {
1080 break;
1081 }
1082
1083 LSHProjection lsh(operation, mOperands);
1084 const RunTimeOperandInfo& hash = mOperands[ins[LSHProjection::kHashTensor]];
1085 switch (hash.type) {
1086 case OperandType::TENSOR_FLOAT32: {
1087 success = lsh.Eval<float>();
1088 break;
1089 }
1090 case OperandType::TENSOR_FLOAT16: {
1091 success = lsh.Eval<_Float16>();
1092 break;
1093 }
1094 default: {
1095 success = false;
1096 LOG(ERROR) << "Unsupported data type";
1097 }
1098 }
1099 } break;
1100 case OperationType::BIDIRECTIONAL_SEQUENCE_LSTM: {
1101 const auto merge_outputs = getScalarData<bool>(
1102 mOperands[ins[BidirectionalSequenceLSTM::kMergeOutputsParam]]);
1103 RunTimeOperandInfo& fwOutput =
1104 mOperands[outs[BidirectionalSequenceLSTM::kFwOutputTensor]];
1105 Shape fwOutputShape, bwOutputShape;
1106
1107 BidirectionalSequenceLSTM lstm(operation, mOperands);
1108 success = lstm.Prepare(operation, mOperands, &fwOutputShape, &bwOutputShape) &&
1109 setInfoAndAllocateIfNeeded(&fwOutput, fwOutputShape, &result);
1110 if (!merge_outputs) {
1111 RunTimeOperandInfo& bwOutput =
1112 mOperands[outs[BidirectionalSequenceLSTM::kBwOutputTensor]];
1113 success = success && setInfoAndAllocateIfNeeded(&bwOutput, bwOutputShape, &result);
1114 }
1115 success = success && lstm.Eval();
1116 } break;
1117 case OperationType::LSTM: {
1118 RunTimeOperandInfo& scratch = mOperands[outs[LSTMCell::kScratchBufferTensor]];
1119 RunTimeOperandInfo& outputStateOut = mOperands[outs[LSTMCell::kOutputStateOutTensor]];
1120 RunTimeOperandInfo& cellStateOut = mOperands[outs[LSTMCell::kCellStateOutTensor]];
1121 RunTimeOperandInfo& output = mOperands[outs[LSTMCell::kOutputTensor]];
1122
1123 Shape scratchShape, outputStateShape, cellStateShape, outputShape;
1124 LSTMCell lstm_cell(operation, mOperands);
1125
1126 success = lstm_cell.Prepare(operation, mOperands, &scratchShape, &outputStateShape,
1127 &cellStateShape, &outputShape) &&
1128 setInfoAndAllocateIfNeeded(&scratch, scratchShape, &result) &&
1129 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape, &result) &&
1130 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape, &result) &&
1131 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lstm_cell.Eval();
1132 } break;
1133 case OperationType::RANDOM_MULTINOMIAL: {
1134 const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]];
1135 const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]];
1136 const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]];
1137 RunTimeOperandInfo& output = mOperands[outs[Multinomial::kOutputTensor]];
1138
1139 Shape outputShape;
1140 Multinomial multinomial(operation, mOperands);
1141
1142 success = Multinomial::Prepare(operation, mOperands, &outputShape) &&
1143 setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
1144 multinomial.Eval();
1145 } break;
1146 case OperationType::RNN: {
1147 RunTimeOperandInfo& hiddenStateOut = mOperands[outs[RNN::kHiddenStateOutTensor]];
1148 RunTimeOperandInfo& output = mOperands[outs[RNN::kOutputTensor]];
1149
1150 Shape hiddenStateShape, outputShape;
1151 RNN rnn_cell(operation, mOperands);
1152
1153 success = RNN::Prepare(operation, mOperands, &hiddenStateShape, &outputShape) &&
1154 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape, &result) &&
1155 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && rnn_cell.Eval();
1156 } break;
1157 case OperationType::SVDF: {
1158 RunTimeOperandInfo& stateOut = mOperands[outs[SVDF::kStateOutTensor]];
1159 RunTimeOperandInfo& output = mOperands[outs[SVDF::kOutputTensor]];
1160
1161 Shape stateShape, outputShape;
1162 SVDF svdf(operation, mOperands);
1163
1164 success = SVDF::Prepare(operation, mOperands, &stateShape, &outputShape) &&
1165 setInfoAndAllocateIfNeeded(&stateOut, stateShape, &result) &&
1166 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && svdf.Eval();
1167 } break;
1168 case OperationType::BATCH_TO_SPACE_ND: {
1169 const size_t inCount = ins.size();
1170 if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) {
1171 return ANEURALNETWORKS_BAD_DATA;
1172 }
1173 const RunTimeOperandInfo& input = mOperands[ins[0]];
1174 const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
1175 bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false;
1176
1177 RunTimeOperandInfo& output = mOperands[outs[0]];
1178 Shape outShape = output.shape();
1179
1180 RunTimeOperandInfo input_tmp, output_tmp;
1181 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
1182 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
1183 success = false;
1184 break;
1185 }
1186 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
1187 output_tmp.buffer = data_layout ? nullptr : output.buffer;
1188 output_tmp.length = data_layout ? 0 : output.length;
1189
1190 if (!batchToSpacePrepare(input_tmp.shape(),
1191 reinterpret_cast<const int32_t*>(blockSize.buffer),
1192 blockSize.shape(), &outShape) ||
1193 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
1194 if (!data_layout) output.dimensions = output_tmp.dimensions;
1195 break;
1196 }
1197 switch (input_tmp.type) {
1198 case OperandType::TENSOR_FLOAT32: {
1199 success = batchToSpaceGeneric(
1200 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
1201 reinterpret_cast<const int32_t*>(blockSize.buffer),
1202 reinterpret_cast<float*>(output_tmp.buffer), outShape);
1203 break;
1204 }
1205 case OperandType::TENSOR_FLOAT16: {
1206 success = batchToSpaceGeneric(
1207 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
1208 reinterpret_cast<const int32_t*>(blockSize.buffer),
1209 reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
1210 break;
1211 }
1212 case OperandType::TENSOR_QUANT8_ASYMM: {
1213 success = batchToSpaceGeneric(
1214 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
1215 reinterpret_cast<const int32_t*>(blockSize.buffer),
1216 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
1217 break;
1218 }
1219 default: {
1220 LOG(ERROR) << "Unsupported data type";
1221 success = false;
1222 }
1223 }
1224 if (data_layout) {
1225 output_tmp_guard.reset(output_tmp.buffer);
1226 }
1227 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
1228 success = false;
1229 break;
1230 }
1231 } break;
1232 case OperationType::SPACE_TO_BATCH_ND: {
1233 const size_t inCount = ins.size();
1234 if ((inCount != 4 && inCount != 3) || !allParametersPresent(inCount, 1)) {
1235 return ANEURALNETWORKS_BAD_DATA;
1236 }
1237 const RunTimeOperandInfo& input = mOperands[ins[0]];
1238 const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
1239 const RunTimeOperandInfo& paddings = mOperands[ins[2]];
1240 bool data_layout = inCount == 4 ? getScalarData<bool>(mOperands[ins[3]]) : false;
1241
1242 RunTimeOperandInfo& output = mOperands[outs[0]];
1243 Shape outShape = output.shape();
1244
1245 RunTimeOperandInfo input_tmp, output_tmp;
1246 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
1247 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
1248 success = false;
1249 break;
1250 }
1251 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
1252 output_tmp.buffer = data_layout ? nullptr : output.buffer;
1253 output_tmp.length = data_layout ? 0 : output.length;
1254
1255 if (!spaceToBatchPrepare(
1256 input_tmp.shape(), reinterpret_cast<const int32_t*>(blockSize.buffer),
1257 blockSize.shape(), reinterpret_cast<const int32_t*>(paddings.buffer),
1258 paddings.shape(), &outShape) ||
1259 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
1260 if (!data_layout) output.dimensions = output_tmp.dimensions;
1261 break;
1262 }
1263 switch (input_tmp.type) {
1264 case OperandType::TENSOR_FLOAT32: {
1265 success = spaceToBatchGeneric(
1266 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
1267 reinterpret_cast<const int32_t*>(blockSize.buffer),
1268 reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(),
1269 reinterpret_cast<float*>(output_tmp.buffer), outShape);
1270 break;
1271 }
1272 case OperandType::TENSOR_FLOAT16: {
1273 success = spaceToBatchGeneric(
1274 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
1275 reinterpret_cast<const int32_t*>(blockSize.buffer),
1276 reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(),
1277 reinterpret_cast<_Float16*>(output_tmp.buffer), outShape);
1278 break;
1279 }
1280 case OperandType::TENSOR_QUANT8_ASYMM: {
1281 success = spaceToBatchGeneric(
1282 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
1283 reinterpret_cast<const int32_t*>(blockSize.buffer),
1284 reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(),
1285 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
1286 break;
1287 }
1288 default: {
1289 LOG(ERROR) << "Unsupported data type";
1290 success = false;
1291 }
1292 }
1293 if (data_layout) {
1294 output_tmp_guard.reset(output_tmp.buffer);
1295 }
1296 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
1297 success = false;
1298 break;
1299 }
1300 } break;
1301 case OperationType::PAD:
1302 case OperationType::PAD_V2: {
1303 const bool isV2 = operation.type == OperationType::PAD_V2;
1304 if (!allParametersPresent(isV2 ? 3 : 2, 1)) {
1305 return ANEURALNETWORKS_BAD_DATA;
1306 }
1307 const RunTimeOperandInfo& input = mOperands[ins[0]];
1308 const RunTimeOperandInfo& paddings = mOperands[ins[1]];
1309
1310 RunTimeOperandInfo& output = mOperands[outs[0]];
1311 Shape outShape = output.shape();
1312
1313 if (!padPrepare(input.shape(), reinterpret_cast<const int32_t*>(paddings.buffer),
1314 paddings.shape(), &outShape) ||
1315 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
1316 break;
1317 }
1318 if (input.type == OperandType::TENSOR_FLOAT32) {
1319 float pad_value = isV2 ? getScalarData<float>(mOperands[ins[2]]) : 0;
1320 success = padGeneric(reinterpret_cast<const float*>(input.buffer), input.shape(),
1321 reinterpret_cast<const int32_t*>(paddings.buffer), pad_value,
1322 reinterpret_cast<float*>(output.buffer), outShape);
1323 } else if (input.type == OperandType::TENSOR_FLOAT16) {
1324 _Float16 pad_value = isV2 ? getScalarData<_Float16>(mOperands[ins[2]]) : 0;
1325 success = padGeneric(reinterpret_cast<const _Float16*>(input.buffer), input.shape(),
1326 reinterpret_cast<const int32_t*>(paddings.buffer),
1327 static_cast<_Float16>(pad_value),
1328 reinterpret_cast<_Float16*>(output.buffer), outShape);
1329 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1330 uint8_t pad_value =
1331 isV2 ? getScalarData<uint8_t>(mOperands[ins[2]]) : outShape.offset;
1332 success = padGeneric(input.buffer, input.shape(),
1333 reinterpret_cast<const int32_t*>(paddings.buffer), pad_value,
1334 output.buffer, outShape);
1335 }
1336 } break;
1337 case OperationType::CAST: {
1338 if (!allParametersPresent(1, 1)) {
1339 return ANEURALNETWORKS_BAD_DATA;
1340 }
1341 const RunTimeOperandInfo& input = mOperands[ins[0]];
1342
1343 RunTimeOperandInfo& output = mOperands[outs[0]];
1344 Shape outShape = output.shape();
1345
1346 success = cast::prepare(input.shape(), &outShape) &&
1347 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1348 cast::eval(input.buffer, input.shape(), output.buffer, outShape);
1349 } break;
1350 case OperationType::SQUEEZE: {
1351 if (ins.size() != 2 || outs.size() != 1 ||
1352 mOperands[ins[0]].lifetime == OperandLifeTime::NO_VALUE ||
1353 mOperands[outs[0]].lifetime == OperandLifeTime::NO_VALUE) {
1354 LOG(ERROR) << "Wrong input/output count or lifetime for SQUEEZE op.";
1355 return ANEURALNETWORKS_BAD_DATA;
1356 }
1357 const RunTimeOperandInfo& input = mOperands[ins[0]];
1358 const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]];
1359
1360 RunTimeOperandInfo& output = mOperands[outs[0]];
1361 Shape outShape = output.shape();
1362
1363 success = squeezePrepare(input.shape(),
1364 reinterpret_cast<const int32_t*>(squeezeDims.buffer),
1365 squeezeDims.shape(), &outShape) &&
1366 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1367 copyData(input.buffer, input.shape(), output.buffer, outShape);
1368 } break;
1369 case OperationType::STRIDED_SLICE: {
1370 if (!allParametersPresent(7, 1)) {
1371 return ANEURALNETWORKS_BAD_DATA;
1372 }
1373 const RunTimeOperandInfo& input = mOperands[ins[0]];
1374 const RunTimeOperandInfo& begins = mOperands[ins[1]];
1375 const RunTimeOperandInfo& ends = mOperands[ins[2]];
1376 const RunTimeOperandInfo& strides = mOperands[ins[3]];
1377 int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]);
1378 int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]);
1379 int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]);
1380
1381 RunTimeOperandInfo& output = mOperands[outs[0]];
1382 Shape outShape = output.shape();
1383
1384 success =
1385 stridedSlicePrepare(
1386 input.shape(), reinterpret_cast<const int32_t*>(begins.buffer),
1387 begins.shape(), reinterpret_cast<const int32_t*>(ends.buffer),
1388 ends.shape(), reinterpret_cast<const int32_t*>(strides.buffer),
1389 strides.shape(), beginMask, endMask, shrinkAxisMask, &outShape) &&
1390 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1391 stridedSliceGeneric(input.buffer, input.shape(),
1392 reinterpret_cast<const int32_t*>(begins.buffer),
1393 reinterpret_cast<const int32_t*>(ends.buffer),
1394 reinterpret_cast<const int32_t*>(strides.buffer), beginMask,
1395 endMask, shrinkAxisMask, output.buffer, outShape);
1396 } break;
1397 case OperationType::MEAN: {
1398 if (!allParametersPresent(3, 1)) {
1399 return ANEURALNETWORKS_BAD_DATA;
1400 }
1401 const RunTimeOperandInfo& input = mOperands[ins[0]];
1402 const RunTimeOperandInfo& axis = mOperands[ins[1]];
1403 int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]);
1404
1405 RunTimeOperandInfo& output = mOperands[outs[0]];
1406 Shape outShape = output.shape();
1407
1408 if (!meanPrepare(input.shape(), reinterpret_cast<const int32_t*>(axis.buffer),
1409 axis.shape(), keepDims > 0, &outShape) ||
1410 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) {
1411 break;
1412 }
1413 if (input.type == OperandType::TENSOR_FLOAT16) {
1414 success = meanFloat16(reinterpret_cast<_Float16*>(input.buffer), input.shape(),
1415 reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(),
1416 keepDims > 0, reinterpret_cast<_Float16*>(output.buffer),
1417 outShape);
1418 } else if (input.type == OperandType::TENSOR_FLOAT32) {
1419 success = meanGeneric<float, float>(
1420 reinterpret_cast<float*>(input.buffer), input.shape(),
1421 reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), keepDims > 0,
1422 reinterpret_cast<float*>(output.buffer), outShape);
1423 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1424 success = meanGeneric<uint8_t, int32_t>(
1425 reinterpret_cast<uint8_t*>(input.buffer), input.shape(),
1426 reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), keepDims > 0,
1427 reinterpret_cast<uint8_t*>(output.buffer), outShape);
1428 }
1429 } break;
1430 case OperationType::ARGMAX:
1431 case OperationType::ARGMIN: {
1432 if (!allParametersPresent(2, 1)) {
1433 return ANEURALNETWORKS_BAD_DATA;
1434 }
1435 const RunTimeOperandInfo& input = mOperands[ins[0]];
1436 int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]);
1437
1438 RunTimeOperandInfo& output = mOperands[outs[0]];
1439 Shape outShape = output.shape();
1440
1441 const bool isArgMin = operation.type == OperationType::ARGMIN;
1442 success = argMinMaxPrepare(input.shape(), axis, &outShape) &&
1443 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1444 argMinMaxGeneric(input.buffer, input.shape(), axis, isArgMin, output.buffer,
1445 outShape);
1446 } break;
1447 case OperationType::EXPAND_DIMS: {
1448 if (!allParametersPresent(2, 1)) {
1449 return ANEURALNETWORKS_BAD_DATA;
1450 }
1451 const RunTimeOperandInfo& input = mOperands[ins[0]];
1452 int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]);
1453
1454 RunTimeOperandInfo& output = mOperands[outs[0]];
1455 Shape outShape = output.shape();
1456
1457 success = expand_dims::prepare(input.shape(), axis, &outShape) &&
1458 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1459 expand_dims::eval(input.buffer, input.shape(), axis, output.buffer, outShape);
1460 } break;
1461 case OperationType::SPLIT: {
1462 if (ins.size() != 3) {
1463 LOG(ERROR) << "Wrong input count";
1464 return ANEURALNETWORKS_BAD_DATA;
1465 }
1466
1467 const RunTimeOperandInfo& input = mOperands[ins[0]];
1468 const int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]);
1469 const int32_t numOutputs = getScalarData<int32_t>(mOperands[ins[2]]);
1470
1471 if (numOutputs != outs.size()) {
1472 return ANEURALNETWORKS_BAD_DATA;
1473 }
1474
1475 std::vector<Shape> outputShapes(numOutputs);
1476 for (int i = 0; i < numOutputs; ++i) {
1477 outputShapes[i] = mOperands[outs[i]].shape();
1478 }
1479
1480 success = splitPrepare(input.shape(), axis, numOutputs, &outputShapes);
1481 for (int i = 0; i < numOutputs; ++i) {
1482 success = success && setInfoAndAllocateIfNeeded(&(mOperands[outs[i]]),
1483 outputShapes[i], &result);
1484 }
1485 switch (input.type) {
1486 case OperandType::TENSOR_FLOAT16: {
1487 std::vector<_Float16*> outputDataPtrs(numOutputs);
1488 for (int i = 0; i < numOutputs; ++i) {
1489 outputDataPtrs[i] = reinterpret_cast<_Float16*>(mOperands[outs[i]].buffer);
1490 }
1491 success = success &&
1492 splitFloat16(reinterpret_cast<const _Float16*>(input.buffer),
1493 input.shape(), axis, &outputDataPtrs, outputShapes);
1494 } break;
1495 case OperandType::TENSOR_FLOAT32: {
1496 std::vector<float*> outputDataPtrs(numOutputs);
1497 for (int i = 0; i < numOutputs; ++i) {
1498 outputDataPtrs[i] = reinterpret_cast<float*>(mOperands[outs[i]].buffer);
1499 }
1500 success = success &&
1501 splitFloat32(reinterpret_cast<const float*>(input.buffer),
1502 input.shape(), axis, &outputDataPtrs, outputShapes);
1503 } break;
1504 case OperandType::TENSOR_INT32: {
1505 std::vector<int32_t*> outputDataPtrs(numOutputs);
1506 for (int i = 0; i < numOutputs; ++i) {
1507 outputDataPtrs[i] = reinterpret_cast<int32_t*>(mOperands[outs[i]].buffer);
1508 }
1509 success = success &&
1510 splitInt32(reinterpret_cast<const int32_t*>(input.buffer),
1511 input.shape(), axis, &outputDataPtrs, outputShapes);
1512 } break;
1513 case OperandType::TENSOR_QUANT8_ASYMM: {
1514 std::vector<uint8_t*> outputDataPtrs(numOutputs);
1515 for (int i = 0; i < numOutputs; ++i) {
1516 outputDataPtrs[i] = reinterpret_cast<uint8_t*>(mOperands[outs[i]].buffer);
1517 }
1518 success = success &&
1519 splitQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1520 input.shape(), axis, &outputDataPtrs, outputShapes);
1521 } break;
1522 default: {
1523 return ANEURALNETWORKS_BAD_DATA;
1524 }
1525 }
1526 } break;
1527 case OperationType::MAXIMUM:
1528 case OperationType::MINIMUM: {
1529 if (!allParametersPresent(2, 1)) {
1530 return ANEURALNETWORKS_BAD_DATA;
1531 }
1532 const RunTimeOperandInfo& in1 = mOperands[ins[0]];
1533 const RunTimeOperandInfo& in2 = mOperands[ins[1]];
1534
1535 RunTimeOperandInfo& output = mOperands[outs[0]];
1536 Shape outputShape = output.shape();
1537
1538 const bool isMinimum = operation.type == OperationType::MINIMUM;
1539 success = maximum_minimum::prepare(in1.shape(), in2.shape(), &outputShape) &&
1540 setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
1541 maximum_minimum::eval(in1.buffer, in1.shape(), in2.buffer, in2.shape(),
1542 isMinimum, output.buffer, outputShape);
1543 } break;
1544 case OperationType::GROUPED_CONV_2D: {
1545 const size_t inCount = ins.size();
1546 if ((inCount != 12 && inCount != 9) || !allParametersPresent(inCount, 1)) {
1547 return ANEURALNETWORKS_BAD_DATA;
1548 }
1549 const RunTimeOperandInfo& input = mOperands[ins[0]];
1550 const RunTimeOperandInfo& filter = mOperands[ins[1]];
1551 const RunTimeOperandInfo& bias = mOperands[ins[2]];
1552
1553 int32_t padding_left, padding_right;
1554 int32_t padding_top, padding_bottom;
1555 int32_t padding_implicit = 0;
1556 int32_t stride_width, stride_height;
1557 int32_t numGroups;
1558 int32_t activation;
1559 bool data_layout = false;
1560
1561 if (inCount == 12) {
1562 padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
1563 padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
1564 padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
1565 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
1566 stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
1567 stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
1568 numGroups = getScalarData<int32_t>(mOperands[ins[9]]);
1569 activation = getScalarData<int32_t>(mOperands[ins[10]]);
1570 data_layout = getScalarData<bool>(mOperands[ins[11]]);
1571 } else {
1572 padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
1573 stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
1574 stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
1575 numGroups = getScalarData<int32_t>(mOperands[ins[6]]);
1576 activation = getScalarData<int32_t>(mOperands[ins[7]]);
1577 data_layout = getScalarData<bool>(mOperands[ins[8]]);
1578 }
1579
1580 RunTimeOperandInfo& output = mOperands[outs[0]];
1581 Shape outShape = output.shape();
1582
1583 RunTimeOperandInfo input_tmp, output_tmp;
1584 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard;
1585 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) {
1586 success = false;
1587 break;
1588 }
1589 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE;
1590 output_tmp.buffer = data_layout ? nullptr : output.buffer;
1591 output_tmp.length = data_layout ? 0 : output.length;
1592
1593 if (inCount == 9) {
1594 Shape inputShape = input_tmp.shape();
1595 Shape filterShape = filter.shape();
1596 int32_t input_width = getSizeOfDimension(inputShape, 2);
1597 int32_t input_height = getSizeOfDimension(inputShape, 1);
1598 int32_t filter_width = getSizeOfDimension(filterShape, 2);
1599 int32_t filter_height = getSizeOfDimension(filterShape, 1);
1600 calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
1601 &padding_left, &padding_right);
1602 calculateExplicitPadding(input_height, stride_height, filter_height,
1603 padding_implicit, &padding_top, &padding_bottom);
1604 }
1605
1606 if (!groupedConvPrepare(input_tmp.shape(), filter.shape(), bias.shape(), padding_left,
1607 padding_right, padding_top, padding_bottom, stride_width,
1608 stride_height, numGroups, &outShape) ||
1609 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) {
1610 if (!data_layout) output.dimensions = output_tmp.dimensions;
1611 success = false;
1612 break;
1613 }
1614
1615 if (input_tmp.type == OperandType::TENSOR_FLOAT32) {
1616 success = groupedConvFloat32(
1617 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(),
1618 reinterpret_cast<const float*>(filter.buffer), filter.shape(),
1619 reinterpret_cast<const float*>(bias.buffer), bias.shape(), padding_left,
1620 padding_right, padding_top, padding_bottom, stride_width, stride_height,
1621 numGroups, activation, reinterpret_cast<float*>(output_tmp.buffer),
1622 outShape);
1623 } else if (input_tmp.type == OperandType::TENSOR_FLOAT16) {
1624 success = groupedConvFloat16(
1625 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(),
1626 reinterpret_cast<const _Float16*>(filter.buffer), filter.shape(),
1627 reinterpret_cast<const _Float16*>(bias.buffer), bias.shape(), padding_left,
1628 padding_right, padding_top, padding_bottom, stride_width, stride_height,
1629 numGroups, activation, reinterpret_cast<_Float16*>(output_tmp.buffer),
1630 outShape);
1631 } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) {
1632 if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
1633 success = groupedConvQuant8PerChannel(
1634 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
1635 reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(),
1636 filter.extraParams.channelQuant().scales.data(),
1637 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
1638 padding_left, padding_right, padding_top, padding_bottom, stride_width,
1639 stride_height, numGroups, activation,
1640 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
1641 } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) {
1642 success = groupedConvQuant8(
1643 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(),
1644 reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(),
1645 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(),
1646 padding_left, padding_right, padding_top, padding_bottom, stride_width,
1647 stride_height, numGroups, activation,
1648 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape);
1649 }
1650 }
1651
1652 if (data_layout) {
1653 output_tmp_guard.reset(output_tmp.buffer);
1654 }
1655 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) {
1656 success = false;
1657 break;
1658 }
1659 } break;
1660 case OperationType::TILE: {
1661 if (!allParametersPresent(2, 1)) {
1662 return ANEURALNETWORKS_BAD_DATA;
1663 }
1664 const RunTimeOperandInfo& input = mOperands[ins[0]];
1665 const RunTimeOperandInfo& multiples = mOperands[ins[1]];
1666
1667 RunTimeOperandInfo& output = mOperands[outs[0]];
1668 Shape outShape = output.shape();
1669
1670 success =
1671 tile::prepare(input.shape(), reinterpret_cast<const int32_t*>(multiples.buffer),
1672 multiples.shape(), &outShape) &&
1673 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1674 tile::eval(input.buffer, input.shape(),
1675 reinterpret_cast<const int32_t*>(multiples.buffer), output.buffer,
1676 outShape);
1677 } break;
1678 case OperationType::QUANTIZED_16BIT_LSTM: {
1679 if (!allParametersPresent(15, 2)) {
1680 return ANEURALNETWORKS_BAD_DATA;
1681 }
1682
1683 RunTimeOperandInfo& cellStateOut =
1684 mOperands[outs[QuantizedLSTMCell::kCellStateOutTensor]];
1685 RunTimeOperandInfo& output = mOperands[outs[QuantizedLSTMCell::kOutputTensor]];
1686
1687 Shape cellStateOutShape, outputShape;
1688 QuantizedLSTMCell quantizedLSTMCell(operation, mOperands);
1689
1690 success = QuantizedLSTMCell::prepare(operation, mOperands, &cellStateOutShape,
1691 &outputShape) &&
1692 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateOutShape, &result) &&
1693 setInfoAndAllocateIfNeeded(&output, outputShape, &result) &&
1694 quantizedLSTMCell.eval();
1695 } break;
1696 case OperationType::POW: {
1697 if (!allParametersPresent(2, 1)) {
1698 return ANEURALNETWORKS_BAD_DATA;
1699 }
1700 const RunTimeOperandInfo& base = mOperands[ins[0]];
1701 const RunTimeOperandInfo& exponent = mOperands[ins[1]];
1702
1703 RunTimeOperandInfo& output = mOperands[outs[0]];
1704 Shape outShape = output.shape();
1705
1706 success = pow::prepare(base.shape(), exponent.shape(), &outShape) &&
1707 setInfoAndAllocateIfNeeded(&output, outShape, &result) &&
1708 pow::eval(base.buffer, base.shape(), exponent.buffer, exponent.shape(),
1709 output.buffer, outShape);
1710 } break;
1711 case OperationType::TOPK_V2: {
1712 if (!allParametersPresent(2, 2)) {
1713 return ANEURALNETWORKS_BAD_DATA;
1714 }
1715 const RunTimeOperandInfo& input = mOperands[ins[0]];
1716 int32_t k = getScalarData<int32_t>(mOperands[ins[1]]);
1717
1718 RunTimeOperandInfo& values = mOperands[outs[0]];
1719 Shape valuesShape = values.shape();
1720 RunTimeOperandInfo& indices = mOperands[outs[1]];
1721 Shape indicesShape = indices.shape();
1722
1723 success = topk_v2::prepare(input.shape(), k, &valuesShape, &indicesShape) &&
1724 setInfoAndAllocateIfNeeded(&values, valuesShape, &result) &&
1725 setInfoAndAllocateIfNeeded(&indices, indicesShape, &result) &&
1726 topk_v2::eval(input.buffer, input.shape(), k, values.buffer, valuesShape,
1727 indices.buffer, indicesShape);
1728 } break;
1729 default: {
1730 const OperationRegistration* operationRegistration =
1731 mOperationResolver->findOperation(operation.type);
1732 if (operationRegistration == nullptr) {
1733 LOG(ERROR) << getOperationName(operation.type) << " not registered";
1734 } else if (operationRegistration->prepare == nullptr ||
1735 operationRegistration->execute == nullptr) {
1736 LOG(ERROR) << "Incomplete operation registration: "
1737 << getOperationName(operation.type);
1738 } else {
1739 OperationExecutionContext context(&operation, mOperands.data());
1740 success = operationRegistration->flags.allowOmittedOperand ||
1741 context.checkNoOmittedOperand();
1742 success = success && (operationRegistration->flags.allowZeroSizedInput ||
1743 context.checkNoZeroSizedInput());
1744 success = success && operationRegistration->prepare(&context) &&
1745 operationRegistration->execute(&context);
1746 result = context.getResultCode();
1747 }
1748 }
1749 }
1750 if (!success && result == ANEURALNETWORKS_NO_ERROR) {
1751 result = ANEURALNETWORKS_OP_FAILED;
1752 }
1753 if (result != ANEURALNETWORKS_NO_ERROR) {
1754 LOG(ERROR) << getOperationName(operation.type) << " failed.";
1755 return result;
1756 }
1757
1758 freeNoLongerUsedOperands(ins);
1759 return ANEURALNETWORKS_NO_ERROR;
1760 }
1761
finish(int result)1762 void CpuExecutor::finish(int result) {
1763 // Free allocated temporary operands.
1764 for (auto& info : mOperands) {
1765 if (info.lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info.buffer != nullptr) {
1766 delete[] info.buffer;
1767 info.buffer = nullptr;
1768 }
1769 }
1770
1771 // Only report the output shapes when the result code is NO_ERROR or
1772 // OUTPUT_INSUFFICIENT_SIZE.
1773 if (result == ANEURALNETWORKS_NO_ERROR || result == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) {
1774 const auto& outputs = mModel->outputIndexes;
1775 mOutputShapes.resize(outputs.size());
1776 for (uint32_t i = 0; i < outputs.size(); i++) {
1777 const uint32_t operandIndex = outputs[i];
1778 RunTimeOperandInfo& from = mOperands[operandIndex];
1779 mOutputShapes[i].dimensions = from.dimensions;
1780 mOutputShapes[i].isSufficient = from.isSufficient();
1781 }
1782 } else {
1783 mOutputShapes.clear();
1784 }
1785
1786 mModel = nullptr;
1787 mRequest = nullptr;
1788 mFinished = true;
1789 }
1790
1791 // b/109953668, disable OpenMP
1792 #ifdef NNAPI_OPENMP
ScopedOpenmpSettings()1793 ScopedOpenmpSettings::ScopedOpenmpSettings() {
1794 mBlocktimeInitial = kmp_get_blocktime();
1795 kmp_set_blocktime(20); // ms, see b/109645291
1796
1797 #if NNAPI_LIMIT_CPU_THREADS
1798 // Code not yet enabled. Choosing the number of threads to be based on
1799 // benchmarking. See longer comment by the class declaration.
1800 mMaxThreadsInitial = Eigen::nbThreads();
1801 const int nProcs = omp_get_num_procs();
1802 int threads = nProcs;
1803 if (nProcs >= 8) {
1804 threads = nProcs - 4;
1805 } else if (nProcs >= 4) {
1806 threads = nProcs - 2;
1807 }
1808 Eigen::setNbThreads(threads);
1809 #endif
1810 }
1811
~ScopedOpenmpSettings()1812 ScopedOpenmpSettings::~ScopedOpenmpSettings() {
1813 kmp_set_blocktime(mBlocktimeInitial);
1814 #if NNAPI_LIMIT_CPU_THREADS
1815 Eigen::setNbThreads(mMaxThreadsInitial);
1816 #endif
1817 }
1818 #endif // NNAPI_OPENMP
1819
1820 } // namespace nn
1821 } // namespace android
1822