• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "OperationsUtils"
18 
19 #include "OperationsUtils.h"
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <limits>
24 #include <sstream>
25 #include <vector>
26 
27 #include "LegacyUtils.h"
28 #include "Operations.h"
29 
30 namespace android {
31 namespace nn {
32 
33 namespace {
34 
validateOperandTypes(const std::vector<OperandType> & expectedTypes,const char * tag,uint32_t operandCount,std::function<OperandType (uint32_t)> getOperandType)35 bool validateOperandTypes(const std::vector<OperandType>& expectedTypes, const char* tag,
36                           uint32_t operandCount,
37                           std::function<OperandType(uint32_t)> getOperandType) {
38     NN_RET_CHECK_EQ(operandCount, expectedTypes.size());
39     for (uint32_t i = 0; i < operandCount; ++i) {
40         OperandType type = getOperandType(i);
41         NN_RET_CHECK(type == expectedTypes[i])
42                 << "Invalid " << tag << " tensor type " << type << " for " << tag << " " << i
43                 << ", expected " << expectedTypes[i];
44     }
45     return true;
46 }
47 
CalculateActivationRangeImpl(int32_t activation,const Shape & outputShape,int32_t qmin,int32_t qmax,int32_t * act_min,int32_t * act_max)48 void CalculateActivationRangeImpl(int32_t activation, const Shape& outputShape, int32_t qmin,
49                                   int32_t qmax, int32_t* act_min, int32_t* act_max) {
50     const auto scale = outputShape.scale;
51     const auto zero_point = outputShape.offset;
52 
53     auto quantize = [scale, zero_point](float f) {
54         return zero_point + static_cast<int32_t>(std::round(f / scale));
55     };
56 
57     if (activation == kActivationRelu) {
58         *act_min = std::max(qmin, quantize(0.0));
59         *act_max = qmax;
60     } else if (activation == kActivationRelu6) {
61         *act_min = std::max(qmin, quantize(0.0));
62         *act_max = std::min(qmax, quantize(6.0));
63     } else if (activation == kActivationRelu1) {
64         *act_min = std::max(qmin, quantize(-1.0));
65         *act_max = std::min(qmax, quantize(1.0));
66     } else if (activation == kActivationNone) {
67         *act_min = qmin;
68         *act_max = qmax;
69     } else {
70         LOG(ERROR) << "Unsupported fused activation function.";
71     }
72 }
73 
74 }  // namespace
75 
validateInputTypes(const IOperationValidationContext * context,const std::vector<OperandType> & expectedTypes)76 bool validateInputTypes(const IOperationValidationContext* context,
77                         const std::vector<OperandType>& expectedTypes) {
78     return validateOperandTypes(expectedTypes, "input", context->getNumInputs(),
79                                 [context](uint32_t index) { return context->getInputType(index); });
80 }
81 
validateOutputTypes(const IOperationValidationContext * context,const std::vector<OperandType> & expectedTypes)82 bool validateOutputTypes(const IOperationValidationContext* context,
83                          const std::vector<OperandType>& expectedTypes) {
84     return validateOperandTypes(
85             expectedTypes, "output", context->getNumOutputs(),
86             [context](uint32_t index) { return context->getOutputType(index); });
87 }
88 
validateVersion(const IOperationValidationContext * context,Version contextVersion,Version minSupportedVersion)89 bool validateVersion(const IOperationValidationContext* context, Version contextVersion,
90                      Version minSupportedVersion) {
91     if (contextVersion < minSupportedVersion) {
92         std::ostringstream message;
93         message << "Operation " << context->getOperationName() << " with inputs {";
94         for (uint32_t i = 0, n = context->getNumInputs(); i < n; ++i) {
95             if (i != 0) {
96                 message << ", ";
97             }
98             message << context->getInputType(i);
99         }
100         message << "} and outputs {";
101         for (uint32_t i = 0, n = context->getNumOutputs(); i < n; ++i) {
102             if (i != 0) {
103                 message << ", ";
104             }
105             message << context->getOutputType(i);
106         }
107         message << "} is only supported since " << minSupportedVersion << " (validating using "
108                 << contextVersion << ")";
109         NN_RET_CHECK_FAIL() << message.str();
110     }
111     return true;
112 }
113 
SameShape(const Shape & in1,const Shape & in2)114 bool SameShape(const Shape& in1, const Shape& in2) {
115     if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size()) {
116         return false;
117     }
118     for (size_t i = 0; i < in1.dimensions.size(); i++) {
119         if (in1.dimensions[i] != in2.dimensions[i]) {
120             return false;
121         }
122     }
123     return true;
124 }
125 
SetShape(const Shape & in,Shape * out)126 bool SetShape(const Shape& in, Shape* out) {
127     if (in.type != out->type) {
128         return false;
129     }
130     out->dimensions = in.dimensions;
131     return true;
132 }
133 
getNumberOfElements(const Shape & shape)134 uint32_t getNumberOfElements(const Shape& shape) {
135     uint32_t count = 1;
136     for (size_t i = 0; i < shape.dimensions.size(); i++) {
137         count *= shape.dimensions[i];
138     }
139     return count;
140 }
141 
getNumberOfElements(const Shape & shape,size_t firstAxisInclusive,size_t lastAxisExclusive)142 uint32_t getNumberOfElements(const Shape& shape, size_t firstAxisInclusive,
143                              size_t lastAxisExclusive) {
144     nnAssert(0 <= firstAxisInclusive);
145     nnAssert(firstAxisInclusive <= lastAxisExclusive);
146     nnAssert(lastAxisExclusive <= shape.dimensions.size());
147     uint32_t count = 1;
148     for (size_t i = firstAxisInclusive; i < lastAxisExclusive; i++) {
149         count *= shape.dimensions[i];
150     }
151     return count;
152 }
153 
getNumberOfDimensions(const Shape & shape)154 uint32_t getNumberOfDimensions(const Shape& shape) {
155     return shape.dimensions.size();
156 }
157 
getSizeOfDimension(const Shape & shape,uint32_t dimensionIdx)158 uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
159     nnAssert(0 <= dimensionIdx && dimensionIdx < shape.dimensions.size());
160     return shape.dimensions[dimensionIdx];
161 }
162 
hasKnownRank(const Shape & shape)163 uint32_t hasKnownRank(const Shape& shape) {
164     return !shape.dimensions.empty();
165 }
166 
handleNegativeAxis(int32_t numberOfDimensions,int32_t * axis)167 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis) {
168     NN_CHECK(-numberOfDimensions <= *axis && *axis < numberOfDimensions);
169     if (*axis < 0) {
170         *axis += numberOfDimensions;
171     }
172     return true;
173 }
174 
QuantizeMultiplier(double double_multiplier,int32_t * quantized_multiplier,int32_t * shift)175 bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int32_t* shift) {
176     if (double_multiplier == 0.) {
177         *quantized_multiplier = 0;
178         *shift = 0;
179         return true;
180     }
181     const double q = std::frexp(double_multiplier, shift);
182     auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
183     NN_RET_CHECK(q_fixed <= (1ll << 31));
184     if (q_fixed == (1ll << 31)) {
185         q_fixed /= 2;
186         ++*shift;
187     }
188     NN_RET_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
189     // A shift amount smaller than -31 would cause all bits to be shifted out
190     // and thus all results would be zero. We implement that instead with
191     // q_fixed==0, so as to avoid hitting issues with right-shift
192     // operations with shift amounts greater than 31. Note that this happens
193     // roughly when abs(double_multiplier) < 2^-31 and the present handling means
194     // that we're effectively flushing tiny double_multiplier's to zero.
195     // We could conceivably handle values in the range (roughly) [32, 63]
196     // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
197     // the present handling is just doing 'flush denormals to zero'. We could
198     // reconsider and actually generate nonzero denormals if a need arises.
199     if (*shift < -31) {
200         *shift = 0;
201         q_fixed = 0;
202     }
203     *quantized_multiplier = static_cast<int32_t>(q_fixed);
204     return true;
205 }
206 
QuantizeMultiplierSmallerThanOneExp(double double_multiplier,int32_t * quantized_multiplier,int32_t * left_shift)207 bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t* quantized_multiplier,
208                                          int32_t* left_shift) {
209     NN_RET_CHECK(double_multiplier > 0.);
210     NN_RET_CHECK(double_multiplier < 1.);
211     NN_RET_CHECK(QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift));
212     NN_RET_CHECK(*left_shift <= 0);
213     return true;
214 }
215 
QuantizeMultiplierSmallerThanOne(double double_multiplier,int32_t * quantized_multiplier,int32_t * right_shift)216 bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier,
217                                       int32_t* right_shift) {
218     NN_OPS_CHECK(double_multiplier >= 0.);
219     NN_OPS_CHECK(double_multiplier < 1.);
220     if (double_multiplier == 0.) {
221         *quantized_multiplier = 0;
222         *right_shift = 0;
223         return true;
224     }
225     NN_OPS_CHECK(double_multiplier > 0.);
226     const double q = std::frexp(double_multiplier, right_shift);
227     *right_shift *= -1;
228     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
229     NN_OPS_CHECK(q_fixed <= (1LL << 31));
230     if (q_fixed == (1LL << 31)) {
231         q_fixed /= 2;
232         --*right_shift;
233     }
234     NN_OPS_CHECK(*right_shift >= 0);
235     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
236     *quantized_multiplier = static_cast<int32_t>(q_fixed);
237     return true;
238 }
239 
QuantizeMultiplierGreaterThanOne(double double_multiplier,int32_t * quantized_multiplier,int * left_shift)240 bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier,
241                                       int* left_shift) {
242     NN_OPS_CHECK(double_multiplier > 1.);
243     const double q = std::frexp(double_multiplier, left_shift);
244     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
245     NN_OPS_CHECK(q_fixed <= (1LL << 31));
246     if (q_fixed == (1LL << 31)) {
247         q_fixed /= 2;
248         ++*left_shift;
249     }
250     NN_OPS_CHECK(*left_shift >= 0);
251     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
252     *quantized_multiplier = static_cast<int32_t>(q_fixed);
253     return true;
254 }
255 
GetQuantizedConvolutionMultipler(const Shape & inputShape,const Shape & filterShape,const Shape & biasShape,const Shape & outputShape,double * multiplier)256 bool GetQuantizedConvolutionMultipler(const Shape& inputShape, const Shape& filterShape,
257                                       const Shape& biasShape, const Shape& outputShape,
258                                       double* multiplier) {
259     // Upcast bias and input_product to double
260     const double input_product_scale = inputShape.scale * filterShape.scale;
261     const double bias_scale = biasShape.scale;
262 
263     // The following conditions must be guaranteed by the training pipeline.
264     NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
265                  1e-6 * std::min(input_product_scale, bias_scale));
266     NN_OPS_CHECK(input_product_scale >= 0);
267     *multiplier = input_product_scale / outputShape.scale;
268     return true;
269 }
270 
CalculateActivationRangeUint8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)271 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
272                                    int32_t* act_max) {
273     const int32_t qmin = std::numeric_limits<uint8_t>::min();
274     const int32_t qmax = std::numeric_limits<uint8_t>::max();
275 
276     CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
277 }
278 
CalculateActivationRangeInt8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)279 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
280                                   int32_t* act_max) {
281     const int32_t qmin = std::numeric_limits<int8_t>::min();
282     const int32_t qmax = std::numeric_limits<int8_t>::max();
283 
284     CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
285 }
286 
CalculateActivationRangeFloat(int32_t activation,float * activation_min,float * activation_max)287 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
288                                    float* activation_max) {
289     if (activation == kActivationRelu) {
290         *activation_min = 0.f;
291         *activation_max = std::numeric_limits<float>::max();
292     } else if (activation == kActivationRelu6) {
293         *activation_min = 0.f;
294         *activation_max = 6.f;
295     } else if (activation == kActivationRelu1) {
296         *activation_min = -1.f;
297         *activation_max = 1.f;
298     } else if (activation == kActivationNone) {
299         *activation_min = std::numeric_limits<float>::lowest();
300         *activation_max = std::numeric_limits<float>::max();
301     } else {
302         LOG(ERROR) << "Unsupported fused activation function.";
303     }
304 }
305 
CalculateInputRadius(int input_integer_bits,int input_left_shift)306 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
307     const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
308                                       (1LL << (31 - input_integer_bits)) /
309                                       (1LL << input_left_shift);
310     // Tighten bound using floor.  Suppose that we could use the exact value.
311     // After scaling the difference, the result would be at the maximum.  Thus we
312     // must ensure that our value has lower magnitude.
313     return static_cast<int32_t>(std::floor(max_input_rescaled));
314 }
315 
calculateExplicitPaddingImpl(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,bool isTransposeConv,int32_t * padding_head,int32_t * padding_tail)316 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
317                                   int32_t filter_size, int32_t padding_implicit,
318                                   bool isTransposeConv, int32_t* padding_head,
319                                   int32_t* padding_tail) {
320     *padding_head = 0;
321     *padding_tail = 0;
322 
323     int32_t effective_filter_size = (filter_size - 1) * dilation_factor + 1;
324 
325     if (padding_implicit == kPaddingSame) {
326         int32_t out_size = (in_size + stride - 1) / stride;
327         int32_t tmp = (out_size - 1) * stride + effective_filter_size;
328         if (tmp > in_size) {
329             *padding_head = (tmp - in_size) / 2;
330             *padding_tail = (tmp - in_size) - *padding_head;
331         }
332         // For transpose conv, make padding tail fit tightly to the end of the last stride.
333         if (isTransposeConv) {
334             *padding_tail = (tmp - in_size) - *padding_head;
335         }
336     }
337 }
338 
calculateBroadcastedShape(const Shape & in1,const Shape & in2,Shape * out)339 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out) {
340     NN_RET_CHECK(in1.type == in2.type);
341     uint32_t numberOfDims1 = getNumberOfDimensions(in1);
342     uint32_t numberOfDims2 = getNumberOfDimensions(in2);
343     uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
344     out->dimensions = std::vector<uint32_t>(maxDims);
345     for (uint32_t i = 1; i <= maxDims; i++) {
346         uint32_t dim1 = 1;
347         if (i <= numberOfDims1) {
348             dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
349         }
350         uint32_t dim2 = 1;
351         if (i <= numberOfDims2) {
352             dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
353         }
354         if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
355             LOG(ERROR) << "Dimensions mismatch for broadcast:\n"
356                        << "First tensor: dimension " << numberOfDims1 - i << " of size " << dim1
357                        << "\nSecond tensor: dimension " << numberOfDims2 - i << " of size " << dim2;
358             return false;
359         }
360         out->dimensions[maxDims - i] = (dim1 == 1) ? dim2 : dim1;
361     }
362     return true;
363 }
364 
365 template <>
requantize(uint8_t value,const Shape & oldShape,const Shape & newShape)366 uint8_t requantize<uint8_t>(uint8_t value, const Shape& oldShape, const Shape& newShape) {
367     double doubleValue = (value - oldShape.offset) * oldShape.scale;
368     double doubleRet = doubleValue / newShape.scale + newShape.offset;
369     if (doubleRet < 0) return 0;
370     if (doubleRet > 255) return 255;
371     return static_cast<uint8_t>(std::round(doubleRet));
372 }
373 
374 template <>
requantize(int8_t value,const Shape & oldShape,const Shape & newShape)375 int8_t requantize<int8_t>(int8_t value, const Shape& oldShape, const Shape& newShape) {
376     double doubleValue = (value - oldShape.offset) * oldShape.scale;
377     double doubleRet = doubleValue / newShape.scale + newShape.offset;
378     if (doubleRet < -128) return -128;
379     if (doubleRet > 127) return 127;
380     return static_cast<int8_t>(std::round(doubleRet));
381 }
382 
reshapePrepare(const Shape & input,const int32_t * targetDims,const int32_t targetDimsSize,Shape * output)383 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
384                     Shape* output) {
385     // Reshape allows one of the targetDims components to have the
386     // special -1 value, meaning it will be calculated automatically based on the
387     // input. Here we calculate what that dimension should be so that the number
388     // of output elements in the same as the number of input elements.
389     int32_t numInputElements = (int32_t)getNumberOfElements(input);
390 
391     std::vector<uint32_t> outDims(targetDimsSize);
392     int32_t numOutputElements = 1;
393     int32_t strechDim = -1;
394     for (int32_t i = 0; i < targetDimsSize; ++i) {
395         int32_t value = targetDims[i];
396         if (value == -1) {
397             NN_OPS_CHECK(strechDim == -1);
398             strechDim = i;
399         } else {
400             numOutputElements *= value;
401             outDims[i] = (uint32_t)value;
402         }
403     }
404     if (strechDim != -1) {
405         int32_t strechValue = numInputElements / numOutputElements;
406         outDims[strechDim] = (uint32_t)strechValue;
407         numOutputElements *= strechValue;
408     }
409 
410     NN_OPS_CHECK(numInputElements == numOutputElements);
411 
412     output->type = input.type;
413     output->dimensions = outDims;
414     output->offset = input.offset;
415     output->scale = input.scale;
416 
417     return true;
418 }
419 
depthToSpacePrepare(const Shape & input,int32_t blockSize,Shape * output)420 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output) {
421     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
422     NN_OPS_CHECK(blockSize > 0);
423 
424     uint32_t batches = getSizeOfDimension(input, 0);
425     uint32_t height = getSizeOfDimension(input, 1);
426     uint32_t width = getSizeOfDimension(input, 2);
427     uint32_t channels = getSizeOfDimension(input, 3);
428 
429     NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
430     output->type = input.type;
431     output->dimensions = {batches, height * blockSize, width * blockSize,
432                           channels / (blockSize * blockSize)};
433     output->offset = input.offset;
434     output->scale = input.scale;
435 
436     return true;
437 }
438 
spaceToDepthPrepare(const Shape & input,int32_t blockSize,Shape * output)439 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output) {
440     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
441     NN_OPS_CHECK(blockSize > 0);
442 
443     uint32_t batches = getSizeOfDimension(input, 0);
444     uint32_t height = getSizeOfDimension(input, 1);
445     uint32_t width = getSizeOfDimension(input, 2);
446     uint32_t channels = getSizeOfDimension(input, 3);
447 
448     NN_OPS_CHECK(height % blockSize == 0);
449     NN_OPS_CHECK(width % blockSize == 0);
450 
451     output->type = input.type;
452     output->dimensions = {batches, height / blockSize, width / blockSize,
453                           channels * (blockSize * blockSize)};
454     output->offset = input.offset;
455     output->scale = input.scale;
456 
457     return true;
458 }
459 
embeddingLookupPrepare(const Shape & valueShape,const Shape & lookupShape,Shape * outputShape)460 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape) {
461     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
462     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
463 
464     const uint32_t columns = getSizeOfDimension(valueShape, 1);
465     const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
466 
467     outputShape->type = valueShape.type;
468     outputShape->dimensions = {lookups, columns};
469     for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
470         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
471     }
472     outputShape->offset = valueShape.offset;
473     outputShape->scale = valueShape.scale;
474 
475     return true;
476 }
477 
hashtableLookupPrepare(const Shape & lookupShape,const Shape & keyShape,const Shape & valueShape,Shape * outputShape,Shape * hitShape)478 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
479                             const Shape& valueShape, Shape* outputShape, Shape* hitShape) {
480     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
481     NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
482     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
483 
484     const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
485     outputShape->type = valueShape.type;
486     outputShape->dimensions = {lookups};
487     for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
488         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
489     }
490     outputShape->offset = valueShape.offset;
491     outputShape->scale = valueShape.scale;
492 
493     hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
494     hitShape->dimensions = {lookups};
495     hitShape->offset = 0;
496     hitShape->scale = 1.f;
497 
498     return true;
499 }
500 
padPrepare(const Shape & input,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)501 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
502                 Shape* output) {
503     uint32_t numInputDims = getNumberOfDimensions(input);
504 
505     // paddings need to be provided as a 2-D int32 tensor.
506     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
507     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
508     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
509     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
510 
511     std::vector<uint32_t> outDims(numInputDims);
512     for (uint32_t i = 0; i < numInputDims; ++i) {
513         int32_t beforePadding = *paddingsData++;
514         int32_t afterPadding = *paddingsData++;
515         // Pad value has to be greater than equal to 0.
516         NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
517         outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
518     }
519     output->type = input.type;
520     output->dimensions = outDims;
521     output->offset = input.offset;
522     output->scale = input.scale;
523 
524     return true;
525 }
526 
batchToSpacePrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,Shape * output)527 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
528                          const Shape& blockSizeShape, Shape* output) {
529     // Only 4D NHWC tensors are supported.
530     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
531 
532     // blockSize need to be provided as a 1-D int32 tensor.
533     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
534     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
535     // Only applies to spatial dimensions.
536     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
537 
538     uint32_t batches = getSizeOfDimension(input, 0);
539     uint32_t height = getSizeOfDimension(input, 1);
540     uint32_t width = getSizeOfDimension(input, 2);
541     uint32_t channels = getSizeOfDimension(input, 3);
542 
543     NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
544     output->type = input.type;
545     output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
546                           height * blockSizeData[0], width * blockSizeData[1], channels};
547     output->offset = input.offset;
548     output->scale = input.scale;
549 
550     return true;
551 }
552 
spaceToBatchPrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)553 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
554                          const Shape& blockSizeShape, const int32_t* paddingsData,
555                          const Shape& paddingsShape, Shape* output) {
556     // Only 4D NHWC tensors are supported.
557     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
558 
559     // blockSize need to be provided as a 1-D int32 tensor.
560     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
561     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
562     // Only applies to spatial dimensions.
563     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
564 
565     // paddings need to be provided as a 2-D int32 tensor.
566     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
567     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
568     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
569     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
570 
571     uint32_t batches = getSizeOfDimension(input, 0);
572     uint32_t height = getSizeOfDimension(input, 1);
573     uint32_t width = getSizeOfDimension(input, 2);
574     uint32_t channels = getSizeOfDimension(input, 3);
575 
576     uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
577     uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
578 
579     NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
580     NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0);
581 
582     output->type = input.type;
583     output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
584                           paddedHeight / blockSizeData[0], paddedWidth / blockSizeData[1],
585                           channels};
586     output->offset = input.offset;
587     output->scale = input.scale;
588 
589     return true;
590 }
591 
meanPrepare(const Shape & input,const int32_t * axisData,const Shape & axisShape,bool keepDims,Shape * output)592 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
593                  Shape* output) {
594     // perm need to be provided as a 1-D int32 tensor.
595     NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
596     NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
597 
598     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
599     int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
600 
601     // Determines size of output tensor.
602     if (keepDims) {
603         std::vector<uint32_t> outDims(numInputDims);
604         for (int32_t idx = 0; idx < numInputDims; ++idx) {
605             bool isAxis = false;
606             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
607                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
608                     isAxis = true;
609                     break;
610                 }
611             }
612             if (isAxis) {
613                 outDims[idx] = 1;
614             } else {
615                 outDims[idx] = getSizeOfDimension(input, idx);
616             }
617         }
618         output->dimensions = outDims;
619     } else {
620         // Calculates size of reducing axis.
621         int32_t numReduceAxis = axisSize;
622         for (int32_t i = 0; i < axisSize; ++i) {
623             int32_t current = axisData[i];
624             if (current < 0) {
625                 current += numInputDims;
626             }
627             NN_OPS_CHECK(current >= 0 && current < numInputDims);
628             for (int32_t j = 0; j < i; ++j) {
629                 int32_t previous = axisData[j];
630                 if (previous < 0) {
631                     previous += numInputDims;
632                 }
633                 if (current == previous) {
634                     --numReduceAxis;
635                     break;
636                 }
637             }
638         }
639         // Determines output dimensions.
640         std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
641         int32_t numSkipAxis = 0;
642         for (int32_t idx = 0; idx < numInputDims; ++idx) {
643             bool isAxis = false;
644             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
645                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
646                     ++numSkipAxis;
647                     isAxis = true;
648                     break;
649                 }
650             }
651             if (!isAxis) {
652                 outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
653             }
654         }
655         // Handle the case when all dimensions are removed
656         if (outDims.empty()) {
657             outDims.push_back(1);
658         }
659         output->dimensions = outDims;
660     }
661 
662     output->type = input.type;
663     output->offset = input.offset;
664     output->scale = input.scale;
665 
666     return true;
667 }
668 
argMinMaxPrepare(const Shape & input,int32_t axis,Shape * output)669 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output) {
670     NN_CHECK(handleNegativeAxis(input, &axis));
671 
672     output->type = OperandType::TENSOR_INT32;
673 
674     // Copy the input dimensions, omitting the axis dimension.
675     output->dimensions.clear();
676     if (getNumberOfDimensions(input) > 1) {
677         output->dimensions.reserve(getNumberOfDimensions(input) - 1);
678         output->dimensions.insert(output->dimensions.end(), input.dimensions.begin(),
679                                   input.dimensions.begin() + axis);
680         output->dimensions.insert(output->dimensions.end(), input.dimensions.begin() + axis + 1,
681                                   input.dimensions.end());
682     } else {
683         output->dimensions.push_back(1);
684     }
685 
686     return true;
687 }
688 
splitPrepare(const Shape & input,int32_t axis,int32_t numOutputs,std::vector<Shape> * output)689 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs,
690                   std::vector<Shape>* output) {
691     NN_CHECK(handleNegativeAxis(input, &axis));
692 
693     const int32_t sizeOfAxisToSplit = input.dimensions[axis];
694     NN_OPS_CHECK(sizeOfAxisToSplit % numOutputs == 0);
695     const int32_t sliceSize = sizeOfAxisToSplit / numOutputs;
696 
697     for (int i = 0; i < numOutputs; ++i) {
698         output->at(i).type = input.type;
699         output->at(i).dimensions = input.dimensions;
700         output->at(i).dimensions[axis] = sliceSize;
701         output->at(i).offset = input.offset;
702         output->at(i).scale = input.scale;
703     }
704     return true;
705 }
706 
groupedConvPrepare(const Shape & input,const Shape & filter,const Shape & bias,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,Shape * output)707 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
708                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
709                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
710                         int32_t numGroups, Shape* output) {
711     if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
712         NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM ||
713                      input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED);
714     } else {
715         NN_OPS_CHECK(input.type == filter.type);
716     }
717     if (input.type == OperandType::TENSOR_QUANT8_ASYMM ||
718         input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
719         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
720     } else {
721         NN_OPS_CHECK(input.type == bias.type);
722     }
723     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
724     NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
725     NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
726 
727     NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
728 
729     NN_OPS_CHECK(getSizeOfDimension(filter, 3) * numGroups == getSizeOfDimension(input, 3));
730     NN_OPS_CHECK(getSizeOfDimension(filter, 0) % numGroups == 0);
731 
732     uint32_t channels_out = getSizeOfDimension(filter, 0);
733     uint32_t width = getSizeOfDimension(input, 2);
734     uint32_t height = getSizeOfDimension(input, 1);
735     uint32_t filterWidth = getSizeOfDimension(filter, 2);
736     uint32_t filterHeight = getSizeOfDimension(filter, 1);
737     uint32_t batches = getSizeOfDimension(input, 0);
738 
739     NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_left);
740     NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_right);
741     NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_top);
742     NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_bottom);
743 
744     uint32_t outWidth =
745             computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
746     uint32_t outHeight =
747             computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
748 
749     output->type = input.type;
750     output->dimensions = {batches, outHeight, outWidth, channels_out};
751     return true;
752 }
753 
754 }  // namespace nn
755 }  // namespace android
756