1 /* 2 * Copyright (c) 2019-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NELSTMLAYERQUANTIZED_H 25 #define ARM_COMPUTE_NELSTMLAYERQUANTIZED_H 26 27 #include "arm_compute/core/Types.h" 28 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" 29 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" 30 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" 31 #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h" 32 #include "arm_compute/runtime/NEON/functions/NEElementwiseOperations.h" 33 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" 34 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" 35 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" 36 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" 37 #include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h" 38 #include "arm_compute/runtime/NEON/functions/NESlice.h" 39 #include "arm_compute/runtime/NEON/functions/NETranspose.h" 40 41 #include "arm_compute/runtime/common/LSTMParams.h" 42 43 namespace arm_compute 44 { 45 // Forward declarations 46 class ITensor; 47 48 /** Basic function to run @ref NELSTMLayerQuantized 49 * 50 * This function calls the following NEON functions/kernels: 51 * 52 * -# @ref NEGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers 53 * -# @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 54 * -# @ref NETranspose Matrix transpose 55 * -# @ref NEConcatenateLayer Tensor concatenation 56 * -# @ref NEActivationLayer Activation functions (tanh and logistic) 57 * -# @ref NEArithmeticAddition Elementwise addition 58 * -# @ref NEPixelWiseMultiplication Elementwise multiplication 59 * -# @ref NESlice Tensor slicing 60 * -# @ref NEDequantizationLayer Dequantize into float 61 * -# @ref NEQuantizationLayer Quantize from float 62 * */ 63 class NELSTMLayerQuantized : public IFunction 64 { 65 public: 66 /** Default constructor */ 67 NELSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 68 /** Prevent instances of this class from being copied (As this class contains pointers) */ 69 NELSTMLayerQuantized(const NELSTMLayerQuantized &) = delete; 70 /** Prevent instances of this class from being moved (As this class contains pointers) */ 71 NELSTMLayerQuantized(NELSTMLayerQuantized &&) = delete; 72 /** Prevent instances of this class from being copied (As this class contains pointers) */ 73 NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete; 74 /** Prevent instances of this class from being moved (As this class contains pointers) */ 75 NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = delete; 76 /** Default destructor */ 77 ~NELSTMLayerQuantized(); 78 /** Initialize function's tensors. 79 * 80 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 81 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 82 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 83 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 84 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 85 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 86 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 87 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 88 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 89 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 90 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 91 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 92 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 93 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 94 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 95 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 96 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. 97 */ 98 void configure(const ITensor *input, 99 const ITensor *input_to_input_weights, const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, 100 const ITensor *recurrent_to_input_weights, const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, 101 const ITensor *input_gate_bias, const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, 102 ITensor *cell_state_in, const ITensor *output_state_in, 103 ITensor *cell_state_out, ITensor *output_state_out); 104 105 /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer 106 * 107 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 108 * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 109 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 110 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 111 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 112 * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 113 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 114 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 115 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 116 * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 117 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 118 * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 119 * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 120 * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 121 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 122 * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 123 * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input. 124 * 125 * @return a status 126 */ 127 static Status validate(const ITensorInfo *input, 128 const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, 129 const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, 130 const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, 131 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, 132 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); 133 134 // Inherited methods overridden: 135 void run() override; 136 void prepare() override; 137 138 private: 139 MemoryGroup _memory_group; 140 141 // Functions used 142 NEGEMMLowpMatrixMultiplyCore _gemmlowp; 143 NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; 144 NETranspose _transpose_weights; 145 NEConcatenateLayer _concat_input_weights; 146 NEConcatenateLayer _concat_recurrent_weights; 147 NEConcatenateLayer _concat_weights; 148 NEConcatenateLayer _concat_inputs; 149 NEConcatenateLayer _concat_bias; 150 NEActivationLayer _sigmoid_forget_gate; 151 NEActivationLayer _sigmoid_input_gate; 152 NEActivationLayer _sigmoid_output_gate; 153 NEActivationLayer _tanh_modulation_gate; 154 NEActivationLayer _tanh_output_state; 155 NEArithmeticAddition _add1; 156 NEArithmeticAddition _add2; 157 NEPixelWiseMultiplication _mul1; 158 NEPixelWiseMultiplication _mul2; 159 NEPixelWiseMultiplication _mul3; 160 NESlice _slice_input_tensor; 161 NESlice _slice_forget_tensor; 162 NESlice _slice_cell_tensor; 163 NESlice _slice_output_tensor; 164 NEDequantizationLayer _dequantize; 165 NEQuantizationLayer _quantize; 166 167 // Tensor pointers 168 const ITensor *_input_to_input_weights; 169 const ITensor *_input_to_forget_weights; 170 const ITensor *_input_to_cell_weights; 171 const ITensor *_input_to_output_weights; 172 const ITensor *_recurrent_to_input_weights; 173 const ITensor *_recurrent_to_forget_weights; 174 const ITensor *_recurrent_to_cell_weights; 175 const ITensor *_recurrent_to_output_weights; 176 const ITensor *_input_gate_bias; 177 const ITensor *_forget_gate_bias; 178 const ITensor *_cell_bias; 179 const ITensor *_output_gate_bias; 180 181 // Temporary tensors 182 Tensor _recurrent_weights; 183 Tensor _input_weights; 184 Tensor _weights; 185 Tensor _input; 186 Tensor _weights_transposed; 187 Tensor _output_highp; 188 Tensor _output_lowp; 189 Tensor _bias; 190 Tensor _forget_gate_input; 191 Tensor _input_gate_input; 192 Tensor _output_gate_input; 193 Tensor _input_modulation_gate_input; 194 Tensor _forget_gate_output; 195 Tensor _input_gate_output; 196 Tensor _output_gate_output; 197 Tensor _input_modulation_gate_output; 198 Tensor _cell_state1; 199 Tensor _cell_state2; 200 Tensor _output_state_tmp; 201 Tensor _output_state_out_symm; 202 Tensor _output_state_out_f32; 203 204 bool _is_prepared; 205 }; 206 } // namespace arm_compute 207 #endif /* ARM_COMPUTE_NELSTMLAYERQUANTIZED_H */ 208