1 /* 2 * Copyright (c) 2019-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H 25 #define ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H 26 27 #include "arm_compute/core/Types.h" 28 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" 29 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" 30 #include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h" 31 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" 32 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" 33 #include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" 34 #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" 35 #include "arm_compute/runtime/CL/functions/CLQuantizationLayer.h" 36 #include "arm_compute/runtime/CL/functions/CLSlice.h" 37 #include "arm_compute/runtime/CL/functions/CLTranspose.h" 38 39 #include "arm_compute/runtime/common/LSTMParams.h" 40 41 namespace arm_compute 42 { 43 // Forward declarations 44 class ICLTensor; 45 46 /** Basic function to run @ref CLLSTMLayerQuantized 47 * 48 * This function calls the following CL functions/kernels: 49 * 50 * -# @ref CLGEMMLowpMatrixMultiplyCore Quantized matrix multiplication core. Accumulators are 32-bit integers 51 * -# @ref CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint Convert 32-bit integers into QSYMM16 52 * -# @ref CLTranspose Matrix transpose 53 * -# @ref CLConcatenateLayer Tensor concatenation 54 * -# @ref CLActivationLayer Activation functions (tanh and logistic) 55 * -# @ref CLArithmeticAddition Elementwise addition 56 * -# @ref CLPixelWiseMultiplication Elementwise multiplication 57 * -# @ref CLSlice Tensor slicing 58 * -# @ref CLDequantizationLayer Dequantize into float 59 * -# @ref CLQuantizationLayer Quantize from float 60 * */ 61 class CLLSTMLayerQuantized : public IFunction 62 { 63 public: 64 /** Default constructor */ 65 CLLSTMLayerQuantized(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 66 /** Prevent instances of this class from being copied (As this class contains pointers) */ 67 CLLSTMLayerQuantized(const CLLSTMLayerQuantized &) = delete; 68 /** Default move constructor */ 69 CLLSTMLayerQuantized(CLLSTMLayerQuantized &&) = default; 70 /** Prevent instances of this class from being copied (As this class contains pointers) */ 71 CLLSTMLayerQuantized &operator=(const CLLSTMLayerQuantized &) = delete; 72 /** Default move assignment operator */ 73 CLLSTMLayerQuantized &operator=(CLLSTMLayerQuantized &&) = default; 74 /** Initialize function's tensors. 75 * 76 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 77 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 78 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 79 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 80 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 81 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 82 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 83 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 84 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 85 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 86 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 87 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 88 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 89 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 90 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 91 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 92 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. 93 */ 94 void configure(const ICLTensor *input, 95 const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, 96 const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, 97 const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, 98 ICLTensor *cell_state_in, const ICLTensor *output_state_in, 99 ICLTensor *cell_state_out, ICLTensor *output_state_out); 100 /** Initialize function's tensors. 101 * 102 * @param[in] compile_context The compile context to be used. 103 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 104 * @param[in] input_to_input_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 105 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 106 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 107 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, output_size]. Data type supported: Same as @p input. 108 * @param[in] recurrent_to_input_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 109 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 110 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 111 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, output_size]. Data type supported: Same as @p input. 112 * @param[in] input_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 113 * @param[in] forget_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 114 * @param[in] cell_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 115 * @param[in] output_gate_bias 1D weights tensor with dimensions [output_size]. Data type supported: S32. 116 * @param[in] cell_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 117 * @param[in] output_state_in 2D tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 118 * @param[out] cell_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 119 * @param[out] output_state_out Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].Data types supported: Same as @p input. 120 */ 121 void configure(const CLCompileContext &compile_context, const ICLTensor *input, 122 const ICLTensor *input_to_input_weights, const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, 123 const ICLTensor *recurrent_to_input_weights, const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, 124 const ICLTensor *input_gate_bias, const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, 125 ICLTensor *cell_state_in, const ICLTensor *output_state_in, 126 ICLTensor *cell_state_out, ICLTensor *output_state_out); 127 128 /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayerQuantized 129 * 130 * @param[in] input Source tensor info. Input is a 2D tensor info with dimensions [input_size, batch_size]. Data types supported: QASYMM8. 131 * @param[in] input_to_input_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 132 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 133 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 134 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, output_size]. Data type supported: Same as @p input. 135 * @param[in] recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 136 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 137 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 138 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, output_size]. Data type supported: Same as @p input. 139 * @param[in] input_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 140 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 141 * @param[in] cell_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 142 * @param[in] output_gate_bias 1D weights tensor info with dimensions [output_size]. Data type supported: S32. 143 * @param[in] cell_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 144 * @param[in] output_state_in 2D tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 145 * @param[out] cell_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size]. Data type supported: QSYMM16. 146 * @param[out] output_state_out Destination tensor info. Output is a 2D tensor info with dimensions [output_size, batch_size].Data types supported: Same as @p input. 147 * 148 * @return a status 149 */ 150 static Status validate(const ITensorInfo *input, 151 const ITensorInfo *input_to_input_weights, const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, 152 const ITensorInfo *recurrent_to_input_weights, const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, 153 const ITensorInfo *input_gate_bias, const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, 154 const ITensorInfo *cell_state_in, const ITensorInfo *output_state_in, 155 const ITensorInfo *cell_state_out, const ITensorInfo *output_state_out); 156 157 // Inherited methods overridden: 158 void run() override; 159 void prepare() override; 160 161 private: 162 MemoryGroup _memory_group; 163 164 // Functions used 165 CLGEMMLowpMatrixMultiplyCore _gemmlowp; 166 CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint _output_stage; 167 CLTranspose _transpose_weights; 168 CLConcatenateLayer _concat_input_weights; 169 CLConcatenateLayer _concat_recurrent_weights; 170 CLConcatenateLayer _concat_weights; 171 CLConcatenateLayer _concat_inputs; 172 CLConcatenateLayer _concat_bias; 173 CLActivationLayer _sigmoid_forget_gate; 174 CLActivationLayer _sigmoid_input_gate; 175 CLActivationLayer _sigmoid_output_gate; 176 CLActivationLayer _tanh_modulation_gate; 177 CLActivationLayer _tanh_output_state; 178 CLArithmeticAddition _add_cell_state_tmps; 179 CLArithmeticAddition _add2; 180 CLPixelWiseMultiplication _mul_forget_gate_cell_state; 181 CLPixelWiseMultiplication _mul_input_gate_input_mod_gate; 182 CLPixelWiseMultiplication _mul_output_state_tmp_output_gate; 183 CLSlice _slice_input_tensor; 184 CLSlice _slice_forget_tensor; 185 CLSlice _slice_cell_tensor; 186 CLSlice _slice_output_tensor; 187 CLDequantizationLayer _dequantize; 188 CLQuantizationLayer _quantize; 189 190 // Tensor pointers 191 const ICLTensor *_input_to_input_weights; 192 const ICLTensor *_input_to_forget_weights; 193 const ICLTensor *_input_to_cell_weights; 194 const ICLTensor *_input_to_output_weights; 195 const ICLTensor *_recurrent_to_input_weights; 196 const ICLTensor *_recurrent_to_forget_weights; 197 const ICLTensor *_recurrent_to_cell_weights; 198 const ICLTensor *_recurrent_to_output_weights; 199 const ICLTensor *_input_gate_bias; 200 const ICLTensor *_forget_gate_bias; 201 const ICLTensor *_cell_bias; 202 const ICLTensor *_output_gate_bias; 203 204 // Temporary tensors 205 CLTensor _recurrent_weights; 206 CLTensor _input_weights; 207 CLTensor _weights; 208 CLTensor _input; 209 CLTensor _weights_transposed; 210 CLTensor _output_highp; 211 CLTensor _output_lowp; 212 CLTensor _bias; 213 CLTensor _forget_gate_input; 214 CLTensor _input_gate_input; 215 CLTensor _output_gate_input; 216 CLTensor _input_modulation_gate_input; 217 CLTensor _forget_gate_output; 218 CLTensor _input_gate_output; 219 CLTensor _output_gate_output; 220 CLTensor _input_modulation_gate_output; 221 CLTensor _cell_state_tmp1; 222 CLTensor _cell_state_tmp2; 223 CLTensor _output_state_tmp; 224 CLTensor _output_state_out_symm; 225 CLTensor _output_state_out_f32; 226 227 bool _is_prepared; 228 }; 229 } // namespace arm_compute 230 #endif /* ARM_COMPUTE_CLLSTMLAYERQUANTIZED_H */ 231