///
/// Copyright (c) 2021-2022 Arm Limited.
///
/// SPDX-License-Identifier: MIT
///
/// Permission is hereby granted, free of charge, to any person obtaining a copy
/// of this software and associated documentation files (the "Software"), to
/// deal in the Software without restriction, including without limitation the
/// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
/// sell copies of the Software, and to permit persons to whom the Software is
/// furnished to do so, subject to the following conditions:
///
/// The above copyright notice and this permission notice shall be included in all
/// copies or substantial portions of the Software.
///
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
/// SOFTWARE.
///
namespace arm_compute
{
/**
@page operators_list Supported Operators
@tableofcontents
@section S9_1_operators_list Supported Operators
Compute Library supports operators that are listed in below table.
Compute Library supports a wide list of data-types, information can been directly found in the documentation of each kernel/function.
The main data-types that the Machine Learning functions support are the following:
| Function
| Description
| Equivalent Android NNAPI Op
| Backends
| Data Layouts
| Data Types
|
| ActivationLayer
| Function to simulate an activation layer with the specified activation function.
|
- ANEURALNETWORKS_ELU
- ANEURALNETWORKS_HARD_SWISH
- ANEURALNETWORKS_LOGISTIC
- ANEURALNETWORKS_RELU
- ANEURALNETWORKS_RELU1
- ANEURALNETWORKS_RELU6
- ANEURALNETWORKS_TANH
| NEActivationLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16
| | F16 | F16
| | F32 | F32
|
|
| CLActivationLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16
| | F16 | F16
| | F32 | F32
|
|
| ArgMinMaxLayer
| Function to calculate the index of the minimum or maximum values in a tensor based on an axis.
|
- ANEURALNETWORKS_ARGMAX
- ANEURALNETWORKS_ARGMIN
| NEArgMinMaxLayer
|
|
| src | dst
|
|---|
| QASYMM8 | U32, S32
| | QASYMM8_SIGNED | U32, S32
| | S32 | U32, S32
| | F16 | U32, S32
| | F32 | U32, S32
|
|
| CLArgMinMaxLayer
|
|
| src | dst
|
|---|
| QASYMM8 | U32, S32
| | QASYMM8_SIGNED | U32, S32
| | S32 | U32, S32
| | F16 | U32, S32
| | F32 | U32, S32
|
|
| ArithmeticAddition
| Function to add 2 tensors.
|
| NEArithmeticAddition
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | QSYMM16 | QSYMM16 | S32
| | U8 | U8 | U8
| | S16 | S16 | S16
| | S32 | S32 | S32
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| ArithmeticSubtraction
| Function to substract 2 tensors.
|
| NEArithmeticSubtraction
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | QSYMM16 | QSYMM16 | S32
| | U8 | U8 | U8
| | S16 | S16 | S16
| | S32 | S32 | S32
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| BatchNormalizationLayer
| Function to perform batch normalization.
|
| NEBatchNormalizationLayer
|
|
|
| CLBatchNormalizationLayer
|
|
|
| BatchToSpaceLayer
| Batch to space transformation.
|
- ANEURALNETWORKS_BATCH_TO_SPACE_ND
| NEBatchToSpaceLayer
|
|
|
| CLBatchToSpaceLayer
|
|
|
| BitwiseAnd
| Function to perform bitwise AND between 2 tensors.
|
- ANEURALNETWORKS_LOGICAL_AND
| NEBitwiseAnd
|
|
|
| CLBitwiseAnd
|
|
|
| BitwiseNot
| Function to perform bitwise NOT.
|
- ANEURALNETWORKS_LOGICAL_NOT
| NEBitwiseNot
|
|
|
| CLBitwiseNot
|
|
|
| BitwiseOr
| Function to perform bitwise OR between 2 tensors.
|
- ANEURALNETWORKS_LOGICAL_OR
| NEBitwiseOr
|
|
|
| CLBitwiseOr
|
|
|
| BitwiseXor
| Function to perform bitwise XOR between 2 tensors.
|
| NEBitwiseXor
|
|
|
| CLBitwiseXor
|
|
|
| BoundingBoxTransform
| Transform proposal bounding boxes to target bounding box using bounding box deltas.
|
| NEBoundingBoxTransform
|
|
| src0 | src1 | dst
|
|---|
| QASYMM16 | QASYMM8 | QASYMM16
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLBoundingBoxTransform
|
|
| src0 | src1 | dst
|
|---|
| QASYMM16 | QASYMM8 | QASYMM16
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| Cast
| Function to cast a tensor.
|
| NECast
|
|
| src | dst
|
|---|
| QASYMM8_SIGNED | S16, S32, F32, F16
| | QASYMM8 | U16, S16, S32, F32, F16
| | U8 | U16, S16, S32, F32, F16
| | U16 | U8, U32
| | S16 | QASYMM8_SIGNED, U8, S32
| | F16 | QASYMM8_SIGNED, QASYMM8, F32, S32, U8
| | S32 | QASYMM8_SIGNED, QASYMM8, F16, F32, U8
| | F32 | QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
|
|
| CLCast
|
|
| src | dst
|
|---|
| U8 | S8, U16, S16, U32, S32, F16, F32
| | U16 | U8, S8, S16, U32, S32, F16, F32
| | S16 | U8, S8, U16, U32, S32, F16, F32
| | U32 | U8, S8, U16, S16, S32, F16, F32
| | S32 | U8, S8, U16, S16, U32, F16, F32
| | F16 | U8, S8, U16, S16, U32, F32
| | F32 | U8, S8, U16, S16, U32, F16
|
|
| ChannelShuffleLayer
| Function to shuffle the channels of the input tensor.
|
- ANEURALNETWORKS_CHANNEL_SHUFFLE
| NEChannelShuffleLayer
|
|
|
| CLChannelShuffleLayer
|
|
|
| Comparison
| Function to compare 2 tensors.
|
- ANEURALNETWORKS_EQUAL
- ANEURALNETWORKS_GREATER
- ANEURALNETWORKS_GREATER_EQUAL
- ANEURALNETWORKS_LESS
- ANEURALNETWORKS_LESS_EQUAL
- ANEURALNETWORKS_NOT_EQUAL
| CLComparison
|
|
|
| ConcatenateLayer
| Function to concatenate tensors along a given axis.
|
- ANEURALNETWORKS_CONCATENATION
| NEConcatenateLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| CLConcatenateLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| ConvertFullyConnectedWeights
| Function to transpose the weights for the fully connected layer.
|
| NEConvertFullyConnectedWeights
|
|
|
| CLConvertFullyConnectedWeights
|
|
|
| ConvolutionLayer
| Function to compute a convolution layer.
|
| NEConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| CLConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| Conv3D
| Function to compute a 3d convolution layer.
|
| NEConv3D
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
|
|
| CLConv3D
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
|
|
| Copy
| Function to copy a tensor.
|
| NECopy
|
|
|
| CLCopy
|
|
|
| Crop
| Performs a copy of input tensor to the output tensor.
|
| CLCrop
|
|
|
| CropResize
| Function to perform cropping and resizing.
|
| NECropResize
|
|
| src0 | src1 | src2 | dst
|
|---|
| All | F32 | F32 | F32
|
|
| CLCropResize
|
|
| src0 | src1 | src2 | dst
|
|---|
| All | F32 | F32 | F32
|
|
| DeconvolutionLayer
| Function to compute a deconvolution or transpose convolution.
|
- ANEURALNETWORKS_TRANSPOSE_CONV_2D
| NEDeconvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| CLDeconvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| DeconvolutionLayerUpsample
| Function to execute deconvolution upsample on OpenCL.
|
- ANEURALNETWORKS_TRANSPOSE_CONV_2D
| CLDeconvolutionLayerUpsample
|
|
|
| DepthConvertLayer
| Performs a down-scaling depth conversion.
|
| NEDepthConvertLayer
|
|
| src | dst
|
|---|
| QASYMM8 | F16, F32
| | U8 | U16, S16, S32
| | U16 | U8, U32
| | S16 | U8, S32
| | BFLOAT16 | F32
| | F16 | QASYMM8, F32
| | F32 | QASYMM8, F16, BFLOAT16
|
|
| CLDepthConvertLayer
|
|
| src | dst
|
|---|
| U8 | S8, U16, S16, U32, S32, F16, F32
| | U16 | U8, S8, S16, U32, S32, F16, F32
| | S16 | U8, S8, U16, U32, S32, F16, F32
| | U32 | U8, S8, U16, S16, S32, F16, F32
| | S32 | U8, S8, U16, S16, U32, F16, F32
| | F16 | U8, S8, U16, S16, U32, F32
| | F32 | U8, S8, U16, S16, U32, F16
|
|
| DepthToSpaceLayer
| Depth to Space transformation.
|
- ANEURALNETWORKS_DEPTH_TO_SPACE
| NEDepthToSpaceLayer
|
|
|
| CLDepthToSpaceLayer
|
|
|
| DepthwiseConvolutionLayer
| Function to perform depthwise separable convolution.
|
- ANEURALNETWORKS_DEPTHWISE_CONV_2D
| NEDepthwiseConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| CLDepthwiseConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| DequantizationLayer
| Function to dequantize the values in a tensor.
|
- ANEURALNETWORKS_DEQUANTIZE
| NEDequantizationLayer
|
|
| src | dst
|
|---|
| QASYMM8 | F16, F32
| | QASYMM8_SIGNED | F16, F32
| | QSYMM8_PER_CHANNEL | F16, F32
| | QSYMM8 | F16, F32
| | QSYMM16 | F16, F32
|
|
| CLDequantizationLayer
|
|
| src | dst
|
|---|
| QASYMM8 | F16, F32
| | QASYMM8_SIGNED | F16, F32
| | QSYMM8_PER_CHANNEL | F16, F32
| | QSYMM8 | F16, F32
| | QSYMM16 | F16, F32
|
|
| DetectionPostProcessLayer
| Function to generate the detection output based on center size encoded boxes, class prediction and anchors by doing non maximum suppression (NMS).
|
- ANEURALNETWORKS_DETECTION_POSTPROCESSING
| NEDetectionPostProcessLayer
|
|
| src0 - src2 | dst0 - dst3
|
|---|
| QASYMM8 | F32
| | QASYMM8_SIGNED | F32
| | F32 | F32
|
|
| DirectConvolutionLayer
| Function to compute direct convolution.
|
| NEDirectConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
|
|
| CLDirectConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
|
|
| DirectDeconvolutionLayer
| Function to run the deconvolution layer.
|
- ANEURALNETWORKS_TRANSPOSE_CONV_2D
| CLDirectDeconvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| ElementwiseOperations
| Function to perform in Cpu: - Div - Max - Min - Pow - SquaredDiff - Comparisons (Equal, greater, greater_equal, less, less_equal, not_equal) Function to perform in CL: - Add - Sub - Div - Max - Min - Pow - SquaredDiff
|
- ANEURALNETWORKS_MAXIMUM
- ANEURALNETWORKS_MINIMUM
- ANEURALNETWORKS_POW
- ANEURALNETWORKS_DIV
- ANEURALNETWORKS_ADD
- ANEURALNETWORKS_SUB
- ANEURALNETWORKS_EQUAL
- ANEURALNETWORKS_GREATER
- ANEURALNETWORKS_GREATER_EQUAL
- ANEURALNETWORKS_LESS
- ANEURALNETWORKS_LESS_EQUAL
- ANEURALNETWORKS_NOT_EQUAL
| NEElementwiseMax
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | S32 | S32 | S32
| | S16 | S16 | S16
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| NEElementwiseMin
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | S32 | S32 | S32
| | S16 | S16 | S16
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| NEElementwiseSquaredDiff
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | S32 | S32 | S32
| | S16 | S16 | S16
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| NEElementwiseDivision
|
|
| src0 | src1 | dst
|
|---|
| F16 | F16 | F16
| | F32 | F32 | F32
|
|
| NEElementwisePower
|
|
| src0 | src1 | dst
|
|---|
| F16 | F16 | F16
| | F32 | F32 | F32
|
|
| NEElementwiseComparison
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | U8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | U8
| | S32 | S32 | U8
| | U8 | U8 | U8
| | S16 | S16 | U8
| | F16 | F16 | U8
| | F32 | F32 | U8
|
|
| CLArithmeticAddition
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | U8 | U8 | U8
| | U8 | U8 | S16
| | U8 | S16 | S16
| | S16 | U8 | S16
| | S16 | S16 | S16
| | S32 | S32 | S32
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLArithmeticSubtraction
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | U8 | U8 | U8
| | U8 | U8 | S16
| | U8 | S16 | S16
| | S16 | U8 | S16
| | S16 | S16 | S16
| | S32 | S32 | S32
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLArithmeticDivision
|
|
| src0 | src1 | dst
|
|---|
| F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLElementwiseMax
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | U8 | U8 | U8
| | S16 | S16 | S16
| | S32 | S32 | S32
| | U32 | U32 | U32
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLElementwiseMin
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | U8 | U8 | U8
| | S16 | S16 | S16
| | S32 | S32 | S32
| | U32 | U32 | U32
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLElementwiseSquaredDiff
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | U8 | U8 | U8
| | S16 | S16 | S16
| | F16 | F16 | F16
| | F32 | F32 | F32
|
|
| CLElementwisePower
|
|
| src0 | src1 | dst
|
|---|
| F16 | F16 | F16
| | F32 | F32 | F32
|
|
| ElementwiseUnaryLayer
| Function to perform: - Rsqrt - Exp - Neg - Log - Abs - Round - Sin
|
- ANEURALNETWORKS_ABS
- ANEURALNETWORKS_EXP
- ANEURALNETWORKS_LOG
- ANEURALNETWORKS_NEG
- ANEURALNETWORKS_RSQRT
- ANEURALNETWORKS_SIN
| NEElementwiseUnaryLayer
|
|
| src | dst
|
|---|
| F16 | F16
| | F32 | F32
| | S32 | S32
|
|
| CLRsqrtLayer
|
|
|
| CLExpLayer
|
|
|
| CLNegLayer
|
|
| src | dst
|
|---|
| F16 | F16
| | F32 | F32
| | S32 | S32
|
|
| CLSinLayer
|
|
|
| CLLogLayer
|
|
|
| CLAbsLayer
|
|
|
| CLRoundLayer
|
|
|
| FFT1D
| Fast Fourier Transform 1D.
|
| NEFFT1D
|
|
|
| CLFFT1D
|
|
|
| FFT2D
| Fast Fourier Transform 2D.
|
| NEFFT2D
|
|
|
| CLFFT2D
|
|
|
| FFTConvolutionLayer
| Fast Fourier Transform Convolution.
|
| NEFFTConvolutionLayer
|
|
|
| CLFFTConvolutionLayer
|
|
|
| Fill
| Set the values of a tensor with a given value.
|
| NEFill
|
|
|
| CLFill
|
|
|
| FillBorder
| Function to fill the borders within the XY-planes.
|
| NEFillBorder
|
|
|
| FlattenLayer
| Reshape a tensor to be 1D
|
| NEFlattenLayer
|
|
|
| CLFlattenLayer
|
|
|
| Floor
| Round the value to the lowest number.
|
| NEFloor
|
|
|
| CLFloor
|
|
|
| FullyConnectedLayer
| Function to perform a fully connected / dense layer.
|
- ANEURALNETWORKS_FULLY_CONNECTED
| NEFullyConnectedLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
|
|
| CLFullyConnectedLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
|
|
| FuseBatchNormalization
| Function to fuse the batch normalization node to a preceding convolution node.
|
| NEFuseBatchNormalization
|
|
|
| CLFuseBatchNormalization
|
|
|
| Gather
| Performs the Gather operation along the chosen axis.
|
| NEGather
|
|
|
| CLGather
|
|
|
| GEMM
| General Matrix Multiplication.
|
| NEGEMM
|
|
| src0 | src1 | src2 | dst
|
|---|
| F32 | F32 | F32 | F32
| | F16 | F16 | F16 | F16
| | BFLOAT16 | BFLOAT16 | BFLOAT16 | BFLOAT16
|
|
| CLGEMM
|
|
| src0 | src1 | src2 | dst
|
|---|
| F32 | F32 | F32 | F32
| | F16 | F16 | F16 | F16
|
|
| GEMMConv2d
| General Matrix Multiplication.
|
| NEGEMMConv2d
|
|
| src0 | src1 | src2 | dst
|
|---|
| QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | BFLOAT16 | BFLOAT16 | BFLOAT16 | BFLOAT16
|
|
| GEMMConvolutionLayer
| General Matrix Multiplication.
|
| NEGEMMConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | BFLOAT16 | BFLOAT16 | BFLOAT16 | BFLOAT16
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| CLGEMMConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
|
|
| GEMMDeconvolutionLayer
| General Matrix Multiplication.
|
- ANEURALNETWORKS_TRANSPOSE_CONV_2D
| CLGEMMDeconvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
|
|
| GEMMLowpMatrixMultiplyCore
| General Matrix Multiplication.
|
| NEGEMMLowpMatrixMultiplyCore
|
|
| src0 | src1 | src2 | dst
|
|---|
| QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8 | QSYMM8 | S32 | QASYMM8
| | QASYMM8 | QASYMM8 | S32 | S32
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | S32
| | QASYMM8 | QSYMM8 | S32 | S32
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8 | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | S32
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | S32
| | QASYMM8_SIGNED | QSYMM8 | S32 | S32
|
|
| CLGEMMLowpMatrixMultiplyCore
|
|
| src0 | src1 | src2 | dst
|
|---|
| QASYMM8 | QASYMM8 | S32 | QASYMM8
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | QASYMM8
| | QASYMM8 | QSYMM8 | S32 | QASYMM8
| | QASYMM8 | QASYMM8 | S32 | S32
| | QASYMM8 | QSYMM8_PER_CHANNEL | S32 | S32
| | QASYMM8 | QSYMM8 | S32 | S32
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QSYMM8 | S32 | QASYMM8_SIGNED
| | QASYMM8_SIGNED | QASYMM8_SIGNED | S32 | S32
| | QASYMM8_SIGNED | QSYMM8_PER_CHANNEL | S32 | S32
| | QASYMM8_SIGNED | QSYMM8 | S32 | S32
|
|
| GEMMLowpOutputStage
| General Matrix Multiplication.
|
| NEGEMMLowpOutputStage
|
|
| src0 | src1 | dst
|
|---|
| S32 | S32 | QASYMM8
| | S32 | S32 | QASYMM8_SIGNED
| | S32 | S32 | QSYMM16
|
|
| CLGEMMLowpOutputStage
|
|
| src0 | src1 | dst
|
|---|
| S32 | S32 | QASYMM8
| | S32 | S32 | QASYMM8_SIGNED
| | S32 | S32 | QSYMM16
|
|
| GenerateProposalsLayer
| Function to generate proposals for a RPN (Region Proposal Network).
|
- ANEURALNETWORKS_GENERATE_PROPOSALS
| NEGenerateProposalsLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QSYMM8 | QSYMM16 | QASYMM8
|
|
| CLGenerateProposalsLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
| | QASYMM8 | QSYMM8 | QSYMM16 | QASYMM8
|
|
| InstanceNormalizationLayer
| Function to perform a Instance normalization on a given axis.
|
- ANEURALNETWORKS_INSTANCE_NORMALIZATION
| NEInstanceNormalizationLayer
|
|
|
| CLInstanceNormalizationLayer
|
|
|
| L2NormalizeLayer
| Function to perform a L2 normalization on a given axis.
|
- ANEURALNETWORKS_L2_NORMALIZATION
| NEL2NormalizeLayer
|
|
|
| CLL2NormalizeLayer
|
|
|
| Logical
| Function to perform: - Logical AND - Logical OR - Logical NOT
|
| NELogicalAnd
|
|
|
| NELogicalOr
|
|
|
| NELogicalNot
|
|
|
| LogicalAnd
| Function to perform Logical AND.
|
| CLLogicalAnd
|
|
|
| LogicalOr
| Function to perform Logical OR.
|
| CLLogicalOr
|
|
|
| LogicalNot
| Function to perform Logical NOT.
|
| CLLogicalNot
|
|
|
| LSTMLayer
| Function to perform a single time step in a Long Short-Term Memory (LSTM) layer.
|
| NELSTMLayer
|
|
| src0 - src13 | dst0 - dst3
|
|---|
| F16 | F16
| | F32 | F32
|
|
| CLLSTMLayer
|
|
| src0 - src13 | dst0 - dst3
|
|---|
| F16 | F16
| | F32 | F32
|
|
| LSTMLayerQuantized
| Function to perform quantized LSTM (Long Short-Term Memory)
|
- ANEURALNETWORKS_QUANTIZED_LSTM
- ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
| NELSTMLayerQuantized
|
|
| src0 - src8 | src9 - src12 | src13 | src14 | dst0 | dst1
|
|---|
| QASYMM8 | S32 | QSYMM16 | QASYMM8 | QSYMM16 | QASYMM8
|
|
| CLLSTMLayerQuantized
|
|
| src0 - src8 | src9 - src12 | src13 | src14 | dst0 | dst1
|
|---|
| QASYMM8 | S32 | QSYMM16 | QASYMM8 | QSYMM16 | QASYMM8
|
|
| MaxUnpoolingLayer
| Function to perform MaxUnpooling.
|
| NEMaxUnpoolingLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| CLMaxUnpoolingLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| MeanStdDevNormalizationLayer
| Function to execute mean and standard deviation normalization.
|
| NEMeanStdDevNormalizationLayer
|
|
|
| CLMeanStdDevNormalizationLayer
|
|
|
| NormalizationLayer
| Function to compute normalization layer.
|
- ANEURALNETWORKS_LOCAL_RESPONSE_NORMALIZATION
| NENormalizationLayer
|
|
|
| CLNormalizationLayer
|
|
|
| PadLayer
| Function to pad a tensor.
|
- ANEURALNETWORKS_PAD
- ANEURALNETWORKS_PAD_V2
| NEPadLayer
|
|
|
| CLPadLayer
|
|
|
| Permute
| Function to transpose an ND tensor.
|
- ANEURALNETWORKS_TRANSPOSE
| NEPermute
|
|
|
| CLPermute
|
|
|
| PixelWiseMultiplication
| Function to perform a multiplication.
|
| NEPixelWiseMultiplication
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | QSYMM16 | QSYMM16 | S32
| | U8 | U8 | U8
| | U8 | U8 | S16
| | U8 | S16 | S16
| | S16 | U8 | S16
| | S16 | S16 | S16
| | F16 | F16 | F16
| | F32 | S32 | F32
|
|
| CLPixelWiseMultiplication
|
|
| src0 | src1 | dst
|
|---|
| QASYMM8 | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED | QASYMM8_SIGNED
| | QSYMM16 | QSYMM16 | QASYMM16
| | QSYMM16 | QSYMM16 | S32
| | U8 | U8 | U8
| | U8 | U8 | S16
| | U8 | S16 | S16
| | S16 | U8 | S16
| | S16 | S16 | S16
| | F16 | F16 | F16
| | F32 | F32 | F32
| | S32 | S32 | S32
|
|
| PoolingLayer
| Function to perform pooling with the specified pooling operation.
|
- ANEURALNETWORKS_AVERAGE_POOL_2D
- ANEURALNETWORKS_L2_POOL_2D
- ANEURALNETWORKS_MAX_POOL_2D
| NEPoolingLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| CLPoolingLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| Pooling3dLayer
| Function to perform pooling 3D with the specified pooling operation.
|
| NEPooling3dLayer
|
|
| src | dst
|
|---|
| F16 | F16
| | F32 | F32
| | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
|
|
| CLPooling3dLayer
|
|
| src | dst
|
|---|
| F16 | F16
| | F32 | F32
| | QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
|
|
| PReluLayer
| Function to compute the activation layer with the PRELU activation function.
|
| NEPReluLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| CLPReluLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| PriorBoxLayer
| Function to compute prior boxes and clip.
|
| NEPriorBoxLayer
|
|
|
| CLPriorBoxLayer
|
|
|
| QLSTMLayer
| Function to perform quantized LSTM (Long Short-Term Memory).
|
- ANEURALNETWORKS_QUANTIZED_LSTM
- ANEURALNETWORKS_QUANTIZED_16BIT_LSTM
| NEQLSTMLayer
|
|
| src0 | src1 - src6 | src7 -src9 | src10 | src11 | dst0 | dst1 - dst2
|
|---|
| QASYMM8_SIGNED | QASYMM8 | S32 | QSYMM16 | QASYMM8_SIGNED | QSYMM16 | QASYMM8_SIGNED
|
|
| CLQLSTMLayer
|
|
| src0 | src1 - src6 | src7 -src9 | src10 | src11 | dst0 | dst1 - dst2
|
|---|
| QASYMM8_SIGNED | QASYMM8 | S32 | QSYMM16 | QASYMM8_SIGNED | QSYMM16 | QASYMM8_SIGNED
|
|
| QuantizationLayer
| Function to perform quantization layer
|
| NEQuantizationLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8, QASYMM8_SIGNED, QASYMM16
| | QASYMM8_SIGNED | QASYMM8, QASYMM8_SIGNED, QASYMM16
| | F16 | QASYMM8, QASYMM8_SIGNED, QASYMM16
| | F32 | QASYMM8, QASYMM8_SIGNED, QASYMM16
|
|
| CLQuantizationLayer
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8, QASYMM8_SIGNED, QASYMM16
| | QASYMM8_SIGNED | QASYMM8, QASYMM8_SIGNED, QASYMM16
| | F16 | QASYMM8, QASYMM8_SIGNED, QASYMM16
| | F32 | QASYMM8, QASYMM8_SIGNED, QASYMM16
|
|
| Range
| Function to generates a sequence of numbers starting from START and extends by increments of 'STEP' up to but not including 'END'.
|
| NERange
|
|
| dst
|
|---|
| U8
| | S8
| | U16
| | S16
| | U32
| | S32
| | F16
| | F32
|
|
| CLRange
|
|
| dst
|
|---|
| U8
| | S8
| | QASYMM8
| | U16
| | S16
| | U32
| | S32
| | F16
| | F32
|
|
| ReduceMean
| Function to perform reduce mean operation.
|
| NEReduceMean
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| CLReduceMean
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| ReductionOperation
| Function to perform reduce with the following operations - ARG_IDX_MAX: Index of the max value - ARG_IDX_MIN: Index of the min value - MEAN_SUM: Mean of sum - PROD: Product - SUM_SQUARE: Sum of squares - SUM: Sum - MIN: Min - MAX: Max
|
- ANEURALNETWORKS_REDUCE_ALL
- ANEURALNETWORKS_REDUCE_ANY
- ANEURALNETWORKS_REDUCE_MAX
- ANEURALNETWORKS_REDUCE_MIN
- ANEURALNETWORKS_REDUCE_PROD
- ANEURALNETWORKS_REDUCE_SUM
| NEReductionOperation
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
| | S32 | S32
|
|
| CLReductionOperation
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
| | S32 | S32
|
|
| ReorgLayer
| Performs a reorganization layer of input tensor to the output tensor.
|
| NEReorgLayer
|
|
|
| CLReorgLayer
|
|
|
| ReshapeLayer
| Function to reshape a tensor.
|
- ANEURALNETWORKS_RESHAPE
- ANEURALNETWORKS_SQUEEZE
| NEReshapeLayer
|
|
|
| CLReshapeLayer
|
|
|
| Reverse
| Function to reverse tensor according to axis.
|
| NEReverse
|
|
|
| CLReverse
|
|
|
| RNNLayer
| Function to perform recurrent neural network layer.
|
| NERNNLayer
|
|
| src0 | src1 | src2 | src3 | dst0 | dst1
|
|---|
| F16 | F16 | F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32 | F32 | F32
|
|
| CLRNNLayer
|
|
| src0 | src1 | src2 | src3 | dst0 | dst1
|
|---|
| F16 | F16 | F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32 | F32 | F32
|
|
| ROIAlignLayer
| Function to perform ROI alignment.
|
- ANEURALNETWORKS_ROI_ALIGN
| NEROIAlignLayer
|
|
| src0 | src1 | dst
|
|---|
| F16 | F16 | F16
| | F32 | F32 | F32
| | QASYMM8 | QASYMM16 | QASYMM8
| | QASYMM8_SIGNED | QASYMM16 | QASYMM8_SIGNED
|
|
| CLROIAlignLayer
|
|
| src0 | src1 | dst
|
|---|
| F16 | F16 | F16
| | F32 | F32 | F32
| | QASYMM8 | QASYMM16 | QASYMM8
| | QASYMM8_SIGNED | QASYMM16 | QASYMM8_SIGNED
|
|
| ROIPoolingLayer
| Function to perform ROI pooling.
|
- ANEURALNETWORKS_ROI_POOLING
| NEROIPoolingLayer
|
|
| src0 | src1 | dst
|
|---|
| F32 | U16 | F32
| | QASYMM8 | U16 | QASYMM8
|
|
| CLROIPoolingLayer
|
|
| src0 | src1 | dst
|
|---|
| F16 | U16 | F16
| | F32 | U16 | F32
| | QASYMM8 | U16 | QASYMM8
|
|
| Scale
| Function to perform resize a tensor using to interpolate: - Bilinear - Nearest neighbor
|
- ANEURALNETWORKS_RESIZE_BILINEAR
- ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR
| NEScale
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
| | U8 | U8
| | S8 | S8
| | S16 | S16
|
|
| CLScale
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
| | U8 | U8
| | S16 | S16
|
|
| Select
| Function to select values from 2 tensors depending on an input tensor of booleans.
|
| NESelect
|
|
| src0 | src1 | src2 | dst
|
|---|
| U8 | All | All | All
|
|
| CLSelect
|
|
| src0 | src1 | src2 | dst
|
|---|
| U8 | All | All | All
|
|
| Slice
| Function to perform tensor slicing.
|
| NESlice
|
|
|
| CLSlice
|
|
|
| SoftmaxLayer
| Function to compute a SoftmaxLayer and a Log SoftmaxLayer.
|
- ANEURALNETWORKS_LOG_SOFTMAX
- ANEURALNETWORKS_SOFTMAX
| NESoftmaxLayerGeneric
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| CLSoftmaxLayerGeneric
|
|
| src | dst
|
|---|
| QASYMM8 | QASYMM8
| | QASYMM8_SIGNED | QASYMM8_SIGNED
| | F16 | F16
| | F32 | F32
|
|
| SpaceToBatchLayer
| Function to divide a tensor spatially.
|
- ANEURALNETWORKS_SPACE_TO_BATCH_ND
| NESpaceToBatchLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| All | S32 | S32 | All
|
|
| CLSpaceToBatchLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| All | S32 | S32 | All
|
|
| SpaceToDepthLayer
| Function to rearrange blocks of spatial data into depth.
|
- ANEURALNETWORKS_SPACE_TO_DEPTH
| NESpaceToDepthLayer
|
|
|
| CLSpaceToDepthLayer
|
|
|
| Split
| Function to split a tensor along a given axis.
|
| NESplit
|
|
|
| CLSplit
|
|
|
| StackLayer
| Function to stack tensors along an axis.
|
| NEStackLayer
|
|
|
| CLStackLayer
|
|
|
| StridedSlice
| Function to extract a strided slice of a tensor.
|
- ANEURALNETWORKS_STRIDED_SLICE
| NEStridedSlice
|
|
|
| CLStridedSlice
|
|
|
| Tile
| Function to construct a tensor by tiling a given tensor.
|
| NETile
|
|
|
| CLTile
|
|
|
| Transpose
| Function to transpose a 2D tensor.
|
- ANEURALNETWORKS_TRANSPOSE
| NETranspose
|
|
|
| CLTranspose
|
|
|
| Unstack
| Function to unpack a rank-R tensor into rank-(R-1) tensors.
|
| NEUnstack
|
|
|
| CLUnstack
|
|
|
| WinogradConvolutionLayer
| Function to do Winograd Convolution.
|
| NEWinogradConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
|
|
| CLWinogradConvolutionLayer
|
|
| src0 | src1 | src2 | dst
|
|---|
| F16 | F16 | F16 | F16
| | F32 | F32 | F32 | F32
|
|
*/
} // namespace