• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "CpuExecutor"
18 
19 #include "CpuExecutor.h"
20 
21 #include "NeuralNetworks.h"
22 #include "Operations.h"
23 
24 #include <sys/mman.h>
25 
26 namespace android {
27 namespace nn {
28 
29 // TODO: short term, make share memory mapping and updating a utility function.
30 // TODO: long term, implement mmap_fd as a hidl IMemory service.
set(const hidl_memory & hidlMemory)31 bool RunTimePoolInfo::set(const hidl_memory& hidlMemory) {
32     this->hidlMemory = hidlMemory;
33     auto memType = hidlMemory.name();
34     if (memType == "ashmem") {
35         memory = mapMemory(hidlMemory);
36         if (memory == nullptr) {
37             LOG(ERROR) << "Can't map shared memory.";
38             return false;
39         }
40         memory->update();
41         buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
42         if (buffer == nullptr) {
43             LOG(ERROR) << "Can't access shared memory.";
44             return false;
45         }
46         return true;
47     } else if (memType == "mmap_fd") {
48         size_t size = hidlMemory.size();
49         int fd = hidlMemory.handle()->data[0];
50         int prot = hidlMemory.handle()->data[1];
51         size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
52                                         hidlMemory.handle()->data[3]);
53         buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
54         if (buffer == MAP_FAILED) {
55             LOG(ERROR) << "Can't mmap the file descriptor.";
56             return false;
57         }
58         return true;
59     } else {
60         LOG(ERROR) << "unsupported hidl_memory type";
61         return false;
62     }
63 }
64 
65 // Making sure the output data are correctly updated after execution.
update()66 bool RunTimePoolInfo::update() {
67     auto memType = hidlMemory.name();
68     if (memType == "ashmem") {
69         memory->commit();
70         return true;
71     } else if (memType == "mmap_fd") {
72         int prot = hidlMemory.handle()->data[1];
73         if (prot & PROT_WRITE) {
74             size_t size = hidlMemory.size();
75             return msync(buffer, size, MS_SYNC) == 0;
76         }
77     }
78     // No-op for other types of memory.
79     return true;
80 }
81 
setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo> * poolInfos,const hidl_vec<hidl_memory> & pools)82 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
83                                          const hidl_vec<hidl_memory>& pools) {
84     poolInfos->resize(pools.size());
85     for (size_t i = 0; i < pools.size(); i++) {
86         auto& poolInfo = (*poolInfos)[i];
87         if (!poolInfo.set(pools[i])) {
88             LOG(ERROR) << "Could not map pool";
89             return false;
90         }
91     }
92     return true;
93 }
94 
95 // Updates the RunTimeOperandInfo with the newly calculated shape.
96 // Allocate the buffer if we need to.
setInfoAndAllocateIfNeeded(RunTimeOperandInfo * info,const Shape & shape)97 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
98     // For user-provided model output operands, the parameters must match the Shape
99     // calculated from the preparation step.
100     if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
101         if (info->type != shape.type ||
102             info->dimensions != shape.dimensions) {
103             LOG(ERROR) << "Invalid type or dimensions for model output";
104             return false;
105         }
106         if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
107             (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
108             LOG(ERROR) << "Invalid scale or zeroPoint for model output";
109             return false;
110         }
111     }
112     info->type = shape.type;
113     info->dimensions = shape.dimensions;
114     info->scale = shape.scale;
115     info->zeroPoint = shape.offset;
116     if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
117         uint32_t length = sizeOfData(info->type, info->dimensions);
118         info->buffer = new uint8_t[length];
119         if (info->buffer == nullptr) {
120             return false;
121         }
122     }
123     return true;
124 }
125 
126 // Ignore the .pools entry in model and request.  This will have been taken care of
127 // by the caller.
run(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)128 int CpuExecutor::run(const Model& model, const Request& request,
129                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
130                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
131     VLOG(CPUEXE) << "CpuExecutor::run()";
132     // VLOG(CPUEXE) << "model: " << toString(model);
133     VLOG(CPUEXE) << "request: " << toString(request);
134 
135     mModel = &model;
136     mRequest = &request; // TODO check if mRequest is needed
137     initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
138     // The model has serialized the operation in execution order.
139     for (const auto& operation : model.operations) {
140         int n = executeOperation(operation);
141         if (n != ANEURALNETWORKS_NO_ERROR) {
142             return n;
143         }
144     }
145     for (auto runtimeInfo : modelPoolInfos) {
146         runtimeInfo.update();
147     }
148     for (auto runtimeInfo : requestPoolInfos) {
149         runtimeInfo.update();
150     }
151     mModel = nullptr;
152     mRequest = nullptr;
153     VLOG(CPUEXE) << "Completed run normally";
154     return ANEURALNETWORKS_NO_ERROR;
155 }
156 
initializeRunTimeInfo(const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)157 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
158                                         const std::vector<RunTimePoolInfo>& requestPoolInfos) {
159     VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
160     const size_t count = mModel->operands.size();
161     mOperands.resize(count);
162 
163     // Start by setting the runtime info to what's in the model.
164     for (size_t i = 0; i < count; i++) {
165         const Operand& from = mModel->operands[i];
166         RunTimeOperandInfo& to = mOperands[i];
167         to.type = from.type;
168         to.dimensions = from.dimensions;
169         to.scale = from.scale;
170         to.zeroPoint = from.zeroPoint;
171         to.length = from.location.length;
172         to.lifetime = from.lifetime;
173         switch (from.lifetime) {
174             case OperandLifeTime::TEMPORARY_VARIABLE:
175                 to.buffer = nullptr;
176                 to.numberOfUsesLeft = from.numberOfConsumers;
177                 break;
178             case OperandLifeTime::CONSTANT_COPY:
179                 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
180                 to.numberOfUsesLeft = 0;
181                 break;
182             case OperandLifeTime::CONSTANT_REFERENCE: {
183                 auto poolIndex = from.location.poolIndex;
184                 nnAssert(poolIndex < modelPoolInfos.size());
185                 auto& r = modelPoolInfos[poolIndex];
186                 to.buffer = r.buffer + from.location.offset;
187                 to.numberOfUsesLeft = 0;
188                 break;
189             }
190             case OperandLifeTime::MODEL_INPUT:
191             case OperandLifeTime::MODEL_OUTPUT:
192             case OperandLifeTime::NO_VALUE:
193                 to.buffer = nullptr;
194                 to.numberOfUsesLeft = 0;
195                 break;
196             default:
197                 nnAssert(false);
198                 break;
199         }
200     }
201 
202     // Adjust the runtime info for the arguments passed to the model,
203     // modifying the buffer location, and possibly the dimensions.
204     auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
205                                   const hidl_vec<RequestArgument>& arguments) {
206         nnAssert(indexes.size() == arguments.size());
207         for (size_t i = 0; i < indexes.size(); i++) {
208             const uint32_t operandIndex = indexes[i];
209             const RequestArgument& from = arguments[i];
210             RunTimeOperandInfo& to = mOperands[operandIndex];
211             if (from.dimensions.size() > 0) {
212                 // It's the responsibility of the caller to validate that
213                 // from.dimensions only modifies the dimensions that were
214                 // unspecified in the model.  That's the case in SampleDriver.cpp
215                 // with the call to validateRequest().
216                 // TODO make sure that's the case for the default CPU path.
217                 to.dimensions = from.dimensions;
218             }
219             if (from.hasNoValue) {
220                 to.lifetime = OperandLifeTime::NO_VALUE;
221                 nnAssert(to.buffer == nullptr);
222             } else {
223                 auto poolIndex = from.location.poolIndex;
224                 nnAssert(poolIndex < requestPoolInfos.size());
225                 auto& r = requestPoolInfos[poolIndex];
226                 to.buffer = r.buffer + from.location.offset;
227             }
228         }
229     };
230     updateForArguments(mModel->inputIndexes, mRequest->inputs);
231     updateForArguments(mModel->outputIndexes, mRequest->outputs);
232 
233     return true;
234 }
235 
freeNoLongerUsedOperands(const std::vector<uint32_t> & inputs)236 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
237     for (uint32_t i : inputs) {
238         auto& info = mOperands[i];
239         // Check if it's a static or model input/output.
240         if (info.numberOfUsesLeft == 0) {
241             continue;
242         }
243         info.numberOfUsesLeft--;
244         if (info.numberOfUsesLeft == 0) {
245             nnAssert(info.buffer != nullptr);
246             delete[] info.buffer;
247             info.buffer = nullptr;
248         }
249     }
250 }
251 
executeOperation(const Operation & operation)252 int CpuExecutor::executeOperation(const Operation& operation) {
253     // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
254     const hidl_vec<uint32_t>& ins = operation.inputs;
255     const hidl_vec<uint32_t>& outs = operation.outputs;
256     bool success = false;
257 
258     // Function to verify that the number of input and output parameters
259     // matches what is expected.  Also checks that all the parameters have
260     // values. This function is to be used only for operations that do not
261     // accept optional arguments.
262     // TODO Have a version that works for optional arguments.
263     auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
264                                                                 size_t requiredOuts) -> bool {
265         auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
266                           const char* type) -> bool {
267             size_t actualCount = indexes.size();
268             if (actualCount != requiredCount) {
269                 LOG(ERROR) << getOperationName(operation.type)
270                            << ": Invalid number of " << type << " operands. Got " << actualCount
271                            << " of " << requiredCount;
272                 return false;
273             }
274             for (size_t i = 0; i < actualCount; i++) {
275                 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
276                     LOG(ERROR) << getOperationName(operation.type) << " " << type
277                                << " operand " << i << " is required but missing.";
278                     return false;
279                 }
280             }
281             return true;
282         };
283         return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
284     };
285 
286     switch (operation.type) {
287         case OperationType::OEM_OPERATION: {
288             LOG(ERROR) << "OEM operation not supported for CPU execution";
289             success = false;
290         } break;
291         case OperationType::ADD: {
292             if (!allParametersPresent(3, 1)) {
293                 return ANEURALNETWORKS_BAD_DATA;
294             }
295             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
296             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
297             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
298 
299             RunTimeOperandInfo& out = mOperands[outs[0]];
300             Shape outShape = out.shape();
301 
302             if (in1.type == OperandType::TENSOR_FLOAT32) {
303                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
304                           setInfoAndAllocateIfNeeded(&out, outShape) &&
305                           addFloat32(reinterpret_cast<const float*>(in1.buffer),
306                                      in1.shape(),
307                                      reinterpret_cast<const float*>(in2.buffer),
308                                      in2.shape(),
309                                      activation,
310                                      reinterpret_cast<float*>(out.buffer),
311                                      outShape);
312             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
313                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
314                           setInfoAndAllocateIfNeeded(&out, outShape) &&
315                           addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
316                                     in1.shape(),
317                                     reinterpret_cast<const uint8_t*>(in2.buffer),
318                                     in2.shape(),
319                                     activation,
320                                     reinterpret_cast<uint8_t*>(out.buffer),
321                                     outShape);
322             }
323         } break;
324         case OperationType::MUL: {
325             if (!allParametersPresent(3, 1)) {
326                 return ANEURALNETWORKS_BAD_DATA;
327             }
328             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
329             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
330             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
331 
332             RunTimeOperandInfo& out = mOperands[outs[0]];
333             Shape outShape = out.shape();
334 
335             if (in1.type == OperandType::TENSOR_FLOAT32) {
336                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
337                           setInfoAndAllocateIfNeeded(&out, outShape) &&
338                           mulFloat32(reinterpret_cast<const float*>(in1.buffer),
339                                      in1.shape(),
340                                      reinterpret_cast<const float*>(in2.buffer),
341                                      in2.shape(),
342                                      activation,
343                                      reinterpret_cast<float*>(out.buffer),
344                                      outShape);
345             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
346                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
347                           setInfoAndAllocateIfNeeded(&out, outShape) &&
348                           mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
349                                     in1.shape(),
350                                     reinterpret_cast<const uint8_t*>(in2.buffer),
351                                     in2.shape(),
352                                     activation,
353                                     reinterpret_cast<uint8_t*>(out.buffer),
354                                     outShape);
355             }
356         } break;
357         case OperationType::FLOOR: {
358             if (!allParametersPresent(1, 1)) {
359                 return ANEURALNETWORKS_BAD_DATA;
360             }
361             const RunTimeOperandInfo& input = mOperands[ins[0]];
362             RunTimeOperandInfo& output = mOperands[outs[0]];
363             Shape outShape = output.shape();
364 
365             if (input.type == OperandType::TENSOR_FLOAT32) {
366                 success = floorPrepare(input.shape(), &outShape) &&
367                           setInfoAndAllocateIfNeeded(&output, outShape) &&
368                           floorFloat32(reinterpret_cast<const float*>(input.buffer),
369                                        reinterpret_cast<float*>(output.buffer),
370                                        outShape);
371             }
372         } break;
373         case OperationType::DEQUANTIZE: {
374             if (!allParametersPresent(1, 1)) {
375                 return ANEURALNETWORKS_BAD_DATA;
376             }
377             const RunTimeOperandInfo& input = mOperands[ins[0]];
378             RunTimeOperandInfo& output = mOperands[outs[0]];
379             Shape outShape = output.shape();
380 
381             if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
382                 success = dequantizePrepare(input.shape(), &outShape) &&
383                           setInfoAndAllocateIfNeeded(&output, outShape) &&
384                           dequantizeQuant8ToFloat32(
385                                   reinterpret_cast<const uint8_t*>(input.buffer),
386                                   reinterpret_cast<float*>(output.buffer),
387                                   input.shape());
388             }
389         } break;
390         case OperationType::DEPTHWISE_CONV_2D: {
391             const size_t inCount = ins.size();
392             if ((inCount != 11 && inCount != 8) ||
393                     !allParametersPresent(inCount, 1)) {
394                 return ANEURALNETWORKS_BAD_DATA;
395             }
396             const RunTimeOperandInfo& input  = mOperands[ins[0]];
397             const RunTimeOperandInfo& filter = mOperands[ins[1]];
398             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
399 
400             int32_t padding_left, padding_right;
401             int32_t padding_top, padding_bottom;
402             int32_t stride_width, stride_height;
403             int32_t depth_multiplier;
404             int32_t activation;
405 
406             if (inCount == 11) {
407                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
408                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
409                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
410                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
411                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
412                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
413                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
414                 activation       = getScalarData<int32_t>(mOperands[ins[10]]);
415             } else {
416                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
417                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
418                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
419                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
420                 activation       = getScalarData<int32_t>(mOperands[ins[7]]);
421 
422                 Shape inputShape = input.shape();
423                 Shape filterShape = filter.shape();
424                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
425                 int32_t input_height = getSizeOfDimension(inputShape, 1);
426                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
427                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
428                 calculateExplicitPadding(input_width, stride_width,
429                                          filter_width, padding_implicit,
430                                          &padding_left, &padding_right);
431                 calculateExplicitPadding(input_height, stride_height,
432                                          filter_height, padding_implicit,
433                                          &padding_top, &padding_bottom);
434             }
435 
436             RunTimeOperandInfo& output = mOperands[outs[0]];
437             Shape outShape = output.shape();
438 
439             if (input.type == OperandType::TENSOR_FLOAT32) {
440                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
441                                                padding_left, padding_right,
442                                                padding_top, padding_bottom,
443                                                stride_width, stride_height,
444                                                &outShape) &&
445                           setInfoAndAllocateIfNeeded(&output, outShape) &&
446                           depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
447                                                input.shape(),
448                                                reinterpret_cast<const float*>(filter.buffer),
449                                                filter.shape(),
450                                                reinterpret_cast<const float*>(bias.buffer),
451                                                bias.shape(),
452                                                padding_left, padding_right,
453                                                padding_top, padding_bottom,
454                                                stride_width, stride_height,
455                                                depth_multiplier, activation,
456                                                reinterpret_cast<float*>(output.buffer),
457                                                outShape);
458             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
459                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
460                                                padding_left, padding_right,
461                                                padding_top, padding_bottom,
462                                                stride_width, stride_height,
463                                                &outShape) &&
464                           setInfoAndAllocateIfNeeded(&output, outShape) &&
465                           depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
466                                               input.shape(),
467                                               reinterpret_cast<const uint8_t*>(filter.buffer),
468                                               filter.shape(),
469                                               reinterpret_cast<const int32_t*>(bias.buffer),
470                                               bias.shape(),
471                                               padding_left, padding_right,
472                                               padding_top, padding_bottom,
473                                               stride_width, stride_height,
474                                               depth_multiplier, activation,
475                                               reinterpret_cast<uint8_t*>(output.buffer),
476                                               outShape);
477             }
478 
479         } break;
480         case OperationType::CONV_2D: {
481             const size_t inCount = ins.size();
482             if ((inCount != 10 && inCount != 7) ||
483                     !allParametersPresent(inCount, 1)) {
484                 return ANEURALNETWORKS_BAD_DATA;
485             }
486             const RunTimeOperandInfo& input  = mOperands[ins[0]];
487             const RunTimeOperandInfo& filter = mOperands[ins[1]];
488             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
489 
490             int32_t padding_left, padding_right;
491             int32_t padding_top, padding_bottom;
492             int32_t stride_width, stride_height;
493             int32_t activation;
494 
495             if (inCount == 10) {
496                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
497                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
498                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
499                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
500                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
501                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
502                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
503             } else {
504                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
505                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
506                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
507                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
508 
509                 Shape inputShape = input.shape();
510                 Shape filterShape = filter.shape();
511                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
512                 int32_t input_height = getSizeOfDimension(inputShape, 1);
513                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
514                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
515                 calculateExplicitPadding(input_width, stride_width,
516                                          filter_width, padding_implicit,
517                                          &padding_left, &padding_right);
518                 calculateExplicitPadding(input_height, stride_height,
519                                          filter_height, padding_implicit,
520                                          &padding_top, &padding_bottom);
521             }
522 
523             RunTimeOperandInfo& output = mOperands[outs[0]];
524             Shape outShape = output.shape();
525 
526             if (input.type == OperandType::TENSOR_FLOAT32) {
527                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
528                                       padding_left, padding_right,
529                                       padding_top, padding_bottom,
530                                       stride_width, stride_height,
531                                       &outShape) &&
532                           setInfoAndAllocateIfNeeded(&output, outShape) &&
533                           convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
534                                       reinterpret_cast<const float*>(filter.buffer), filter.shape(),
535                                       reinterpret_cast<const float*>(bias.buffer), bias.shape(),
536                                       padding_left, padding_right,
537                                       padding_top, padding_bottom,
538                                       stride_width, stride_height, activation,
539                                       reinterpret_cast<float*>(output.buffer), outShape);
540             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
541                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
542                                       padding_left, padding_right,
543                                       padding_top, padding_bottom,
544                                       stride_width, stride_height,
545                                       &outShape) &&
546                           setInfoAndAllocateIfNeeded(&output, outShape) &&
547                           convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
548                                      input.shape(),
549                                      reinterpret_cast<const uint8_t*>(filter.buffer),
550                                      filter.shape(),
551                                      reinterpret_cast<const int32_t*>(bias.buffer),
552                                      bias.shape(),
553                                      padding_left, padding_right,
554                                      padding_top, padding_bottom,
555                                      stride_width, stride_height, activation,
556                                      reinterpret_cast<uint8_t*>(output.buffer),
557                                      outShape);
558             }
559         } break;
560         case OperationType::AVERAGE_POOL_2D: {
561             const size_t inCount = ins.size();
562             if ((inCount != 10 && inCount != 7) ||
563                     !allParametersPresent(inCount, 1)) {
564                 return ANEURALNETWORKS_BAD_DATA;
565             }
566             const RunTimeOperandInfo& input = mOperands[ins[0]];
567 
568             int32_t padding_left, padding_right;
569             int32_t padding_top, padding_bottom;
570             int32_t stride_width, stride_height;
571             int32_t filter_width, filter_height;
572             int32_t activation;
573 
574             if (inCount == 10) {
575                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
576                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
577                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
578                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
579                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
580                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
581                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
582                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
583                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
584             } else {
585                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
586                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
587                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
588                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
589                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
590                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
591 
592                 Shape inputShape = input.shape();
593                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
594                 int32_t input_height = getSizeOfDimension(inputShape, 1);
595                 calculateExplicitPadding(input_width, stride_width,
596                                          filter_width, padding_implicit,
597                                          &padding_left, &padding_right);
598                 calculateExplicitPadding(input_height, stride_height,
599                                          filter_height, padding_implicit,
600                                          &padding_top, &padding_bottom);
601             }
602 
603             RunTimeOperandInfo& output = mOperands[outs[0]];
604             Shape outShape = output.shape();
605 
606             if (input.type == OperandType::TENSOR_FLOAT32) {
607                 success = genericPoolingPrepare(input.shape(),
608                                                 padding_left, padding_right,
609                                                 padding_top, padding_bottom,
610                                                 stride_width, stride_height,
611                                                 filter_width, filter_height,
612                                                 &outShape) &&
613                           setInfoAndAllocateIfNeeded(&output, outShape) &&
614                           averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
615                                              input.shape(),
616                                              padding_left, padding_right,
617                                              padding_top, padding_bottom,
618                                              stride_width, stride_height,
619                                              filter_width, filter_height, activation,
620                                              reinterpret_cast<float*>(output.buffer),
621                                              outShape);
622             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
623                 success = genericPoolingPrepare(input.shape(),
624                                                 padding_left, padding_right,
625                                                 padding_top, padding_bottom,
626                                                 stride_width, stride_height,
627                                                 filter_width, filter_height,
628                                                 &outShape) &&
629                           setInfoAndAllocateIfNeeded(&output, outShape) &&
630                           averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
631                                             input.shape(),
632                                             padding_left, padding_right,
633                                             padding_top, padding_bottom,
634                                             stride_width, stride_height,
635                                             filter_width, filter_height, activation,
636                                             reinterpret_cast<uint8_t*>(output.buffer),
637                                             outShape);
638             }
639         } break;
640         case OperationType::L2_POOL_2D: {
641             const size_t inCount = ins.size();
642             if ((inCount != 10 && inCount != 7) ||
643                     !allParametersPresent(inCount, 1)) {
644                 return ANEURALNETWORKS_BAD_DATA;
645             }
646             const RunTimeOperandInfo& input = mOperands[ins[0]];
647 
648             int32_t padding_left, padding_right;
649             int32_t padding_top, padding_bottom;
650             int32_t stride_width, stride_height;
651             int32_t filter_width, filter_height;
652             int32_t activation;
653 
654             if (inCount == 10) {
655                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
656                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
657                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
658                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
659                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
660                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
661                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
662                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
663                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
664             } else {
665                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
666                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
667                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
668                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
669                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
670                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
671 
672                 Shape inputShape = input.shape();
673                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
674                 int32_t input_height = getSizeOfDimension(inputShape, 1);
675                 calculateExplicitPadding(input_width, stride_width,
676                                          filter_width, padding_implicit,
677                                          &padding_left, &padding_right);
678                 calculateExplicitPadding(input_height, stride_height,
679                                          filter_height, padding_implicit,
680                                          &padding_top, &padding_bottom);
681             }
682 
683             RunTimeOperandInfo& output = mOperands[outs[0]];
684             Shape outShape = output.shape();
685 
686             if (input.type == OperandType::TENSOR_FLOAT32) {
687                 success = genericPoolingPrepare(input.shape(),
688                                                 padding_left, padding_right,
689                                                 padding_top, padding_bottom,
690                                                 stride_width, stride_height,
691                                                 filter_width, filter_height,
692                                                 &outShape) &&
693                           setInfoAndAllocateIfNeeded(&output, outShape) &&
694                           l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
695                                         input.shape(),
696                                         padding_left, padding_right,
697                                         padding_top, padding_bottom,
698                                         stride_width, stride_height,
699                                         filter_width, filter_height, activation,
700                                         reinterpret_cast<float*>(output.buffer),
701                                         outShape);
702             }
703         } break;
704         case OperationType::MAX_POOL_2D: {
705             const size_t inCount = ins.size();
706             if ((inCount != 10 && inCount != 7) ||
707                     !allParametersPresent(inCount, 1)) {
708                 return ANEURALNETWORKS_BAD_DATA;
709             }
710             const RunTimeOperandInfo& input = mOperands[ins[0]];
711 
712             int32_t padding_left, padding_right;
713             int32_t padding_top, padding_bottom;
714             int32_t stride_width, stride_height;
715             int32_t filter_width, filter_height;
716             int32_t activation;
717 
718             if (inCount == 10) {
719                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
720                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
721                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
722                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
723                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
724                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
725                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
726                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
727                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
728             } else {
729                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
730                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
731                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
732                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
733                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
734                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
735 
736                 Shape inputShape = input.shape();
737                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
738                 int32_t input_height = getSizeOfDimension(inputShape, 1);
739                 calculateExplicitPadding(input_width, stride_width,
740                                          filter_width, padding_implicit,
741                                          &padding_left, &padding_right);
742                 calculateExplicitPadding(input_height, stride_height,
743                                          filter_height, padding_implicit,
744                                          &padding_top, &padding_bottom);
745             }
746 
747             RunTimeOperandInfo& output = mOperands[outs[0]];
748             Shape outShape = output.shape();
749 
750             if (input.type == OperandType::TENSOR_FLOAT32) {
751                 success = genericPoolingPrepare(input.shape(),
752                                                 padding_left, padding_right,
753                                                 padding_top, padding_bottom,
754                                                 stride_width, stride_height,
755                                                 filter_width, filter_height,
756                                                 &outShape) &&
757                           setInfoAndAllocateIfNeeded(&output, outShape) &&
758                           maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
759                                          input.shape(),
760                                          padding_left, padding_right,
761                                          padding_top, padding_bottom,
762                                          stride_width, stride_height,
763                                          filter_width, filter_height, activation,
764                                          reinterpret_cast<float*>(output.buffer),
765                                          outShape);
766             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
767                 success = genericPoolingPrepare(input.shape(),
768                                                 padding_left, padding_right,
769                                                 padding_top, padding_bottom,
770                                                 stride_width, stride_height,
771                                                 filter_width, filter_height,
772                                                 &outShape) &&
773                           setInfoAndAllocateIfNeeded(&output, outShape) &&
774                           maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
775                                         input.shape(),
776                                         padding_left, padding_right,
777                                         padding_top, padding_bottom,
778                                         stride_width, stride_height,
779                                         filter_width, filter_height, activation,
780                                         reinterpret_cast<uint8_t*>(output.buffer),
781                                         outShape);
782             }
783 
784         } break;
785         case OperationType::RELU: {
786             if (!allParametersPresent(1, 1)) {
787                 return ANEURALNETWORKS_BAD_DATA;
788             }
789             const RunTimeOperandInfo& input = mOperands[ins[0]];
790             RunTimeOperandInfo& output = mOperands[outs[0]];
791             Shape outShape = output.shape();
792 
793             if (input.type == OperandType::TENSOR_FLOAT32) {
794                 success = genericActivationPrepare(input.shape(), &outShape) &&
795                           setInfoAndAllocateIfNeeded(&output, outShape) &&
796                           reluFloat32(reinterpret_cast<const float*>(input.buffer),
797                                       input.shape(),
798                                       reinterpret_cast<float*>(output.buffer),
799                                       outShape);
800             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
801                 success = genericActivationPrepare(input.shape(), &outShape) &&
802                           setInfoAndAllocateIfNeeded(&output, outShape) &&
803                           reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
804                                      input.shape(),
805                                      reinterpret_cast<uint8_t*>(output.buffer),
806                                      outShape);
807             }
808         } break;
809         case OperationType::RELU1: {
810             if (!allParametersPresent(1, 1)) {
811                 return ANEURALNETWORKS_BAD_DATA;
812             }
813             const RunTimeOperandInfo& input = mOperands[ins[0]];
814             RunTimeOperandInfo& output = mOperands[outs[0]];
815             Shape outShape = output.shape();
816 
817             if (input.type == OperandType::TENSOR_FLOAT32) {
818                 success = genericActivationPrepare(input.shape(), &outShape) &&
819                           setInfoAndAllocateIfNeeded(&output, outShape) &&
820                           relu1Float32(reinterpret_cast<const float*>(input.buffer),
821                                        input.shape(),
822                                        reinterpret_cast<float*>(output.buffer),
823                                        outShape);
824             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
825                 success = genericActivationPrepare(input.shape(), &outShape) &&
826                           setInfoAndAllocateIfNeeded(&output, outShape) &&
827                           relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
828                                       input.shape(),
829                                       reinterpret_cast<uint8_t*>(output.buffer),
830                                       outShape);
831             }
832         } break;
833         case OperationType::RELU6: {
834             if (!allParametersPresent(1, 1)) {
835                 return ANEURALNETWORKS_BAD_DATA;
836             }
837             const RunTimeOperandInfo& input = mOperands[ins[0]];
838             RunTimeOperandInfo& output = mOperands[outs[0]];
839             Shape outShape = output.shape();
840 
841             if (input.type == OperandType::TENSOR_FLOAT32) {
842                 success = genericActivationPrepare(input.shape(), &outShape) &&
843                           setInfoAndAllocateIfNeeded(&output, outShape) &&
844                           relu6Float32(reinterpret_cast<const float*>(input.buffer),
845                                        input.shape(),
846                                        reinterpret_cast<float*>(output.buffer),
847                                        outShape);
848             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
849                 success = genericActivationPrepare(input.shape(), &outShape) &&
850                           setInfoAndAllocateIfNeeded(&output, outShape) &&
851                           relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
852                                       input.shape(),
853                                       reinterpret_cast<uint8_t*>(output.buffer),
854                                       outShape);
855             }
856         } break;
857         case OperationType::TANH: {
858             if (!allParametersPresent(1, 1)) {
859                 return ANEURALNETWORKS_BAD_DATA;
860             }
861             const RunTimeOperandInfo& input = mOperands[ins[0]];
862             RunTimeOperandInfo& output = mOperands[outs[0]];
863             Shape outShape = output.shape();
864 
865             if (input.type == OperandType::TENSOR_FLOAT32) {
866                 success = genericActivationPrepare(input.shape(), &outShape) &&
867                           setInfoAndAllocateIfNeeded(&output, outShape) &&
868                           tanhFloat32(reinterpret_cast<const float*>(input.buffer),
869                                       input.shape(),
870                                       reinterpret_cast<float*>(output.buffer),
871                                       outShape);
872             }
873         } break;
874         case OperationType::LOGISTIC: {
875             if (!allParametersPresent(1, 1)) {
876                 return ANEURALNETWORKS_BAD_DATA;
877             }
878             const RunTimeOperandInfo& input = mOperands[ins[0]];
879             RunTimeOperandInfo& output = mOperands[outs[0]];
880             Shape outShape = output.shape();
881 
882             if (input.type == OperandType::TENSOR_FLOAT32) {
883                 success = genericActivationPrepare(input.shape(), &outShape) &&
884                           setInfoAndAllocateIfNeeded(&output, outShape) &&
885                           logisticFloat32(reinterpret_cast<const float*>(input.buffer),
886                                           input.shape(),
887                                           reinterpret_cast<float*>(output.buffer),
888                                           outShape);
889             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
890                 success = genericActivationPrepare(input.shape(), &outShape) &&
891                           setInfoAndAllocateIfNeeded(&output, outShape) &&
892                           logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
893                                          input.shape(),
894                                          reinterpret_cast<uint8_t*>(output.buffer),
895                                          outShape);
896             }
897         } break;
898         case OperationType::SOFTMAX: {
899             if (!allParametersPresent(2, 1)) {
900                 return ANEURALNETWORKS_BAD_DATA;
901             }
902             RunTimeOperandInfo& input = mOperands[ins[0]];
903             float beta = getScalarData<float>(mOperands[ins[1]]);
904             if (beta <= 0.0f) {
905                 LOG(ERROR) << "beta must be positive for softmax";
906                 return ANEURALNETWORKS_BAD_DATA;
907             }
908 
909             RunTimeOperandInfo& output = mOperands[outs[0]];
910             Shape outShape = output.shape();
911 
912             if (input.type == OperandType::TENSOR_FLOAT32) {
913                 success = genericActivationPrepare(input.shape(), &outShape) &&
914                           setInfoAndAllocateIfNeeded(&output, outShape) &&
915                           softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
916                                          input.shape(),
917                                          beta,
918                                          reinterpret_cast<float*>(output.buffer),
919                                          output.shape());
920             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
921                 success = genericActivationPrepare(input.shape(), &outShape) &&
922                           setInfoAndAllocateIfNeeded(&output, outShape) &&
923                           softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
924                                         input.shape(),
925                                         beta,
926                                         reinterpret_cast<uint8_t*>(output.buffer),
927                                         output.shape());
928             }
929         } break;
930         case OperationType::FULLY_CONNECTED: {
931             if (!allParametersPresent(4, 1)) {
932                 return ANEURALNETWORKS_BAD_DATA;
933             }
934             RunTimeOperandInfo& input   = mOperands[ins[0]];
935             RunTimeOperandInfo& weights = mOperands[ins[1]];
936             RunTimeOperandInfo& bias    = mOperands[ins[2]];
937 
938             int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
939 
940             RunTimeOperandInfo& output = mOperands[outs[0]];
941             Shape outShape = output.shape();
942 
943             if (input.type == OperandType::TENSOR_FLOAT32) {
944                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
945                                                 &outShape) &&
946                           setInfoAndAllocateIfNeeded(&output, outShape) &&
947                           fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
948                                                 input.shape(),
949                                                 reinterpret_cast<const float*>(weights.buffer),
950                                                 weights.shape(),
951                                                 reinterpret_cast<const float*>(bias.buffer),
952                                                 bias.shape(),
953                                                 activation,
954                                                 reinterpret_cast<float*>(output.buffer),
955                                                 outShape);
956             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
957                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
958                                                 &outShape) &&
959                           setInfoAndAllocateIfNeeded(&output, outShape) &&
960                           fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
961                                                input.shape(),
962                                                reinterpret_cast<const uint8_t*>(weights.buffer),
963                                                weights.shape(),
964                                                reinterpret_cast<const int32_t*>(bias.buffer),
965                                                bias.shape(),
966                                                activation,
967                                                reinterpret_cast<uint8_t*>(output.buffer),
968                                                outShape);
969             }
970         } break;
971         case OperationType::CONCATENATION: {
972             if (outs.size() != 1 || ins.size() < 2) {
973                 return ANEURALNETWORKS_BAD_DATA;
974             }
975             int numInputTensors = ins.size() - 1;
976             int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
977 
978             RunTimeOperandInfo& output = mOperands[outs[0]];
979             Shape outShape = output.shape();
980 
981             const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
982             if (firstInput.type == OperandType::TENSOR_FLOAT32) {
983                 std::vector<Shape> inputShapes(numInputTensors);
984                 std::vector<const float*> inputDataPtrs(numInputTensors);
985 
986                 for (int i=0; i<numInputTensors; i++) {
987                     RunTimeOperandInfo& input = mOperands[ins[i]];
988                     inputShapes[i] = input.shape();
989                     inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
990                 }
991                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
992                           setInfoAndAllocateIfNeeded(&output, outShape) &&
993                           concatenationFloat32(inputDataPtrs, inputShapes, axis,
994                                                reinterpret_cast<float*>(output.buffer), outShape);
995             } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
996                 std::vector<Shape> inputShapes(numInputTensors);
997                 std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
998 
999                 for (int i=0; i<numInputTensors; i++) {
1000                     RunTimeOperandInfo& input = mOperands[ins[i]];
1001                     inputShapes[i] = input.shape();
1002                     inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
1003                 }
1004                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
1005                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1006                           concatenationQuant8(inputDataPtrs, inputShapes, axis,
1007                                               reinterpret_cast<uint8_t*>(output.buffer),
1008                                               outShape);
1009             }
1010         } break;
1011         case OperationType::L2_NORMALIZATION: {
1012             if (!allParametersPresent(1, 1)) {
1013                 return ANEURALNETWORKS_BAD_DATA;
1014             }
1015             const RunTimeOperandInfo& input = mOperands[ins[0]];
1016             RunTimeOperandInfo& output = mOperands[outs[0]];
1017             Shape outShape = output.shape();
1018 
1019             if (input.type == OperandType::TENSOR_FLOAT32) {
1020                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1021                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1022                           l2normFloat32(reinterpret_cast<const float*>(input.buffer),
1023                                         input.shape(),
1024                                         reinterpret_cast<float*>(output.buffer),
1025                                         outShape);
1026             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1027                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1028                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1029                           l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1030                                        input.shape(),
1031                                        reinterpret_cast<uint8_t*>(output.buffer),
1032                                        outShape);
1033             }
1034         } break;
1035         case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
1036             if (!allParametersPresent(5, 1)) {
1037                 return ANEURALNETWORKS_BAD_DATA;
1038             }
1039             const RunTimeOperandInfo& input = mOperands[ins[0]];
1040             int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
1041             float bias = getScalarData<float>(mOperands[ins[2]]);
1042             float alpha = getScalarData<float>(mOperands[ins[3]]);
1043             float beta = getScalarData<float>(mOperands[ins[4]]);
1044 
1045             RunTimeOperandInfo& output = mOperands[outs[0]];
1046             Shape outShape = output.shape();
1047 
1048             if (input.type == OperandType::TENSOR_FLOAT32) {
1049                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1050                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1051                           localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
1052                                                    input.shape(),
1053                                                    radius, bias, alpha, beta,
1054                                                    reinterpret_cast<float*>(output.buffer),
1055                                                    outShape);
1056             }
1057         } break;
1058         case OperationType::RESHAPE: {
1059             if (!allParametersPresent(2, 1)) {
1060                 return ANEURALNETWORKS_BAD_DATA;
1061             }
1062             const RunTimeOperandInfo& input = mOperands[ins[0]];
1063             const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
1064 
1065             RunTimeOperandInfo& output = mOperands[outs[0]];
1066             Shape outShape = output.shape();
1067 
1068             success = reshapePrepare(input.shape(),
1069                                      reinterpret_cast<const int32_t*>(targetShape.buffer),
1070                                      getNumberOfElements(targetShape.shape()),
1071                                      &outShape) &&
1072                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1073                       reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
1074                                      input.shape(),
1075                                      reinterpret_cast<void*>(output.buffer),
1076                                      outShape);
1077         } break;
1078         case OperationType::RESIZE_BILINEAR: {
1079             if (!allParametersPresent(3, 1)) {
1080                 return ANEURALNETWORKS_BAD_DATA;
1081             }
1082             const RunTimeOperandInfo& input = mOperands[ins[0]];
1083             int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
1084             int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
1085 
1086             RunTimeOperandInfo& output = mOperands[outs[0]];
1087             Shape outShape = output.shape();
1088 
1089             if (input.type == OperandType::TENSOR_FLOAT32) {
1090                 success = resizeBilinearPrepare(input.shape(),
1091                                                 width, height,
1092                                                 &outShape) &&
1093                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1094                           resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
1095                                                 input.shape(),
1096                                                 reinterpret_cast<float*>(output.buffer),
1097                                                 outShape);
1098             }
1099         } break;
1100         case OperationType::DEPTH_TO_SPACE: {
1101             if (!allParametersPresent(2, 1)) {
1102                 return ANEURALNETWORKS_BAD_DATA;
1103             }
1104             const RunTimeOperandInfo& input = mOperands[ins[0]];
1105             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1106 
1107             RunTimeOperandInfo& output = mOperands[outs[0]];
1108             Shape outShape = output.shape();
1109 
1110             success = depthToSpacePrepare(input.shape(),
1111                                           blockSize,
1112                                           &outShape) &&
1113                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1114                       depthToSpaceGeneric(input.buffer,
1115                                           input.shape(),
1116                                           blockSize,
1117                                           output.buffer,
1118                                           outShape);
1119         } break;
1120         case OperationType::SPACE_TO_DEPTH: {
1121             if (!allParametersPresent(2, 1)) {
1122                 return ANEURALNETWORKS_BAD_DATA;
1123             }
1124             const RunTimeOperandInfo& input = mOperands[ins[0]];
1125             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1126 
1127             RunTimeOperandInfo& output = mOperands[outs[0]];
1128             Shape outShape = output.shape();
1129 
1130             success = spaceToDepthPrepare(input.shape(),
1131                                           blockSize,
1132                                           &outShape) &&
1133                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1134                       spaceToDepthGeneric(input.buffer,
1135                                           input.shape(),
1136                                           blockSize,
1137                                           output.buffer,
1138                                           outShape);
1139         } break;
1140         case OperationType::EMBEDDING_LOOKUP: {
1141             const RunTimeOperandInfo &values =
1142                 mOperands[ins[EmbeddingLookup::kValueTensor]];
1143             const RunTimeOperandInfo &lookups =
1144                 mOperands[ins[EmbeddingLookup::kLookupTensor]];
1145             RunTimeOperandInfo &output =
1146                 mOperands[outs[EmbeddingLookup::kOutputTensor]];
1147 
1148             Shape outputShape;
1149             EmbeddingLookup lookup(operation, mOperands);
1150 
1151             success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
1152                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1153                 lookup.Eval();
1154         } break;
1155         case OperationType::HASHTABLE_LOOKUP: {
1156             const RunTimeOperandInfo &lookups =
1157                 mOperands[ins[HashtableLookup::kLookupTensor]];
1158             const RunTimeOperandInfo &keys =
1159                 mOperands[ins[HashtableLookup::kKeyTensor]];
1160             const RunTimeOperandInfo &values =
1161                 mOperands[ins[HashtableLookup::kValueTensor]];
1162 
1163             RunTimeOperandInfo &output =
1164                 mOperands[outs[HashtableLookup::kOutputTensor]];
1165             RunTimeOperandInfo &hits =
1166                 mOperands[outs[HashtableLookup::kHitsTensor]];
1167 
1168             Shape outputShape, hitShape;
1169             HashtableLookup lookup(operation, mOperands);
1170 
1171             success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
1172                                              &outputShape, &hitShape) &&
1173                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1174                 setInfoAndAllocateIfNeeded(&hits, hitShape) &&
1175                 lookup.Eval();
1176         } break;
1177         case OperationType::LSH_PROJECTION: {
1178             RunTimeOperandInfo &output =
1179                 mOperands[outs[LSHProjection::kOutputTensor]];
1180 
1181             Shape outputShape;
1182             LSHProjection lsh(operation, mOperands);
1183 
1184             success = LSHProjection::Prepare(operation, mOperands,
1185                                              &outputShape) &&
1186                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1187                 lsh.Eval();
1188         } break;
1189         case OperationType::LSTM: {
1190             RunTimeOperandInfo &scratch =
1191                 mOperands[outs[LSTMCell::kScratchBufferTensor]];
1192             RunTimeOperandInfo &outputStateOut =
1193                 mOperands[outs[LSTMCell::kOutputStateOutTensor]];
1194             RunTimeOperandInfo &cellStateOut =
1195                 mOperands[outs[LSTMCell::kCellStateOutTensor]];
1196             RunTimeOperandInfo &output =
1197                 mOperands[outs[LSTMCell::kOutputTensor]];
1198 
1199             Shape scratchShape, outputStateShape, cellStateShape, outputShape;
1200             LSTMCell lstm_cell(operation, mOperands);
1201 
1202             success = LSTMCell::Prepare(operation, mOperands,
1203                                         &scratchShape, &outputStateShape,
1204                                         &cellStateShape, &outputShape) &&
1205                 setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
1206                 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
1207                 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
1208                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1209                 lstm_cell.Eval();
1210         } break;
1211         case OperationType::RNN: {
1212             RunTimeOperandInfo &hiddenStateOut =
1213                 mOperands[outs[RNN::kHiddenStateOutTensor]];
1214             RunTimeOperandInfo &output =
1215                 mOperands[outs[RNN::kOutputTensor]];
1216 
1217             Shape hiddenStateShape, outputShape;
1218             RNN rnn_cell(operation, mOperands);
1219 
1220             success = RNN::Prepare(operation, mOperands,
1221                                    &hiddenStateShape, &outputShape) &&
1222                 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
1223                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1224                 rnn_cell.Eval();
1225         } break;
1226         case OperationType::SVDF: {
1227             RunTimeOperandInfo &stateOut =
1228                 mOperands[outs[SVDF::kStateOutTensor]];
1229             RunTimeOperandInfo &output =
1230                 mOperands[outs[SVDF::kOutputTensor]];
1231 
1232             Shape stateShape, outputShape;
1233             SVDF svdf(operation, mOperands);
1234 
1235             success = SVDF::Prepare(operation, mOperands,
1236                                     &stateShape, &outputShape) &&
1237                 setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
1238                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1239                 svdf.Eval();
1240         } break;
1241         default:
1242             nnAssert(false);
1243             break;
1244     }
1245     if (!success) {
1246         LOG(ERROR) << getOperationName(operation.type) << " failed.";
1247         return ANEURALNETWORKS_OP_FAILED;
1248     }
1249 
1250     freeNoLongerUsedOperands(ins);
1251     return ANEURALNETWORKS_NO_ERROR;
1252 }
1253 
1254 } // namespace nn
1255 } // namespace android
1256