1 /* 2 * Copyright (c) 2018-2021 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_NELSTMLAYER_H 25 #define ARM_COMPUTE_NELSTMLAYER_H 26 27 #include "arm_compute/core/Types.h" 28 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" 29 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" 30 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h" 31 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h" 32 #include "arm_compute/runtime/NEON/functions/NECopy.h" 33 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" 34 #include "arm_compute/runtime/NEON/functions/NEGEMM.h" 35 #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h" 36 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h" 37 #include "arm_compute/runtime/NEON/functions/NETranspose.h" 38 #include "arm_compute/runtime/common/LSTMParams.h" 39 40 namespace arm_compute 41 { 42 // Forward declarations 43 class ITensor; 44 45 /** Basic function to run @ref NELSTMLayer */ 46 class NELSTMLayer : public IFunction 47 { 48 public: 49 /** Default constructor */ 50 NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 51 /** Prevent instances of this class from being copied (As this class contains pointers) */ 52 NELSTMLayer(const NELSTMLayer &) = delete; 53 /** Prevent instances of this class from being copied (As this class contains pointers) */ 54 NELSTMLayer &operator=(const NELSTMLayer &) = delete; 55 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 56 NELSTMLayer(NELSTMLayer &&) = delete; 57 /** Prevent instances of this class from being moved (As this class contains non movable objects) */ 58 NELSTMLayer &operator=(NELSTMLayer &&) = delete; 59 /** Default destructor */ 60 ~NELSTMLayer(); 61 /** Initialize function's tensors. 62 * 63 * Valid data layouts: 64 * - All 65 * 66 * Valid data type configurations: 67 * |src0 - src13 | dst0 - dst3 | 68 * |:------------|:------------| 69 * |F16 |F16 | 70 * |F32 |F32 | 71 * 72 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. 73 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 74 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 75 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 76 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 77 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 78 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 79 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 80 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 81 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 82 * @param[in] output_state_in 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 83 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 84 * @param[out] scratch_buffer 2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input. 85 * @param[out] output_state_out 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 86 * @param[out] cell_state_out 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 87 * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. 88 * Data types supported: Same as @p input. 89 * @param[in] lstm_params Weights tensors used in peephole optimization: 90 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 91 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 92 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. 93 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 94 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 95 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input 96 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 97 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. 98 * input_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 99 * forget_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 100 * cell_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 101 * output_layer_norm_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 102 * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. 103 * @param[in] cell_threshold The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled. 104 * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. 105 * If set to 0.0 then clipping is disabled. 106 */ 107 void configure(const ITensor *input, 108 const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights, 109 const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights, 110 const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias, 111 const ITensor *output_state_in, const ITensor *cell_state_in, 112 ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output, 113 const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); 114 115 /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer 116 * 117 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. 118 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 119 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 120 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 121 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 122 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 123 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 124 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 125 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 126 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 127 * @param[in] output_state_in 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 128 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 129 * @param[in] scratch_buffer 2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input. 130 * @param[in] output_state_out 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 131 * @param[in] cell_state_out 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 132 * @param[in] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. 133 * Data types supported: Same as @p input. 134 * @param[in] lstm_params Weights tensors used in peephole optimization: 135 * input_to_input_weights (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 136 * recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 137 * cell_to_input_weights (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. 138 * cell_to_forget_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 139 * cell_to_output_weights (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 140 * input_gate_bias (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input 141 * projection_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 142 * projection_bias (Optional) 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. 143 * input_layer_norm_weights (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 144 * forget_layer_norm_weights (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 145 * cell_layer_norm_weights (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 146 * output_layer_norm_weights (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 147 * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. 148 * @param[in] cell_threshold The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled. 149 * @param[in] projection_threshold The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. 150 * If set to 0.0 then clipping is disabled. 151 * 152 * @return a status 153 */ 154 static Status validate(const ITensorInfo *input, 155 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, 156 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, 157 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, 158 const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, 159 const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, 160 const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); 161 162 // Inherited methods overridden: 163 void run() override; 164 void prepare() override; 165 166 private: 167 MemoryGroup _memory_group; 168 NEFullyConnectedLayer _fully_connected_input_gate; 169 NEArithmeticAddition _accum_input_gate1; 170 NEArithmeticSubtraction _subtract_input_gate; 171 NEPixelWiseMultiplication _pixelwise_mul_input_gate; 172 NEActivationLayer _activation_input_gate; 173 NEFullyConnectedLayer _fully_connected_forget_gate; 174 NEArithmeticAddition _accum_forget_gate1; 175 NEPixelWiseMultiplication _pixelwise_mul_forget_gate; 176 NEActivationLayer _activation_forget_gate; 177 NEFullyConnectedLayer _fully_connected_cell_state; 178 NEGEMM _gemm_cell_state1; 179 NETranspose _transpose_cell_state; 180 NEArithmeticAddition _accum_cell_state1; 181 NEArithmeticAddition _accum_cell_state2; 182 NEPixelWiseMultiplication _pixelwise_mul_cell_state1; 183 NEActivationLayer _activation_cell_state; 184 NEActivationLayer _cell_clip; 185 NEPixelWiseMultiplication _pixelwise_mul_cell_state2; 186 NEFullyConnectedLayer _fully_connected_output; 187 NEPixelWiseMultiplication _pixelwise_mul_output_state1; 188 NEArithmeticAddition _accum_output1; 189 NEActivationLayer _activation_output; 190 NEActivationLayer _activation_output_state; 191 NEPixelWiseMultiplication _pixelwise_mul_output_state2; 192 NEFullyConnectedLayer _fully_connected_output_state; 193 NEActivationLayer _projection_clip; 194 NECopy _copy_cell_state; 195 NECopy _copy_output; 196 NEConcatenateLayer _concat_scratch_buffer; 197 NEConcatenateLayer _concat_inputs_forget_gate; 198 NEConcatenateLayer _concat_weights_forget_gate; 199 NEConcatenateLayer _concat_weights_input_gate; 200 NEConcatenateLayer _concat_weights_output; 201 NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate; 202 NEPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; 203 NEArithmeticAddition _accum_input_gate_bias; 204 NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; 205 NEPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; 206 NEArithmeticAddition _accum_forget_gate_bias; 207 NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; 208 NEPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; 209 NEArithmeticAddition _accum_cell_gate_bias; 210 NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate; 211 NEPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; 212 NEArithmeticAddition _accum_output_gate_bias; 213 Tensor _input_gate_out1; 214 Tensor _input_gate_out2; 215 Tensor _input_gate_out3; 216 Tensor _input_gate_out4; 217 Tensor _forget_gate_out1; 218 Tensor _forget_gate_out2; 219 Tensor _forget_gate_out3; 220 Tensor _forget_gate_out4; 221 Tensor _forget_gate_out5; 222 Tensor _forget_gate_out6; 223 Tensor _cell_state_out1; 224 Tensor _cell_state_out2; 225 Tensor _cell_state_out3; 226 Tensor _cell_state_out4; 227 Tensor _cell_state_out5; 228 Tensor _output1; 229 Tensor _output2; 230 Tensor _output3; 231 Tensor _output4; 232 Tensor _cell_state_activation; 233 Tensor _output_state1; 234 Tensor _ones; 235 Tensor _input_layer_norm_out1; 236 Tensor _input_layer_norm_out2; 237 Tensor _forget_layer_norm_out1; 238 Tensor _forget_layer_norm_out2; 239 Tensor _cell_layer_norm_out1; 240 Tensor _cell_layer_norm_out2; 241 Tensor _output_layer_norm_out1; 242 Tensor _output_layer_norm_out2; 243 bool _run_peephole_opt; 244 bool _run_cifg_opt; 245 bool _perform_cell_clipping; 246 bool _has_projection_weights; 247 bool _perform_projection_clipping; 248 bool _is_prepared; 249 bool _is_layer_norm_lstm; 250 }; 251 } // namespace arm_compute 252 #endif /* ARM_COMPUTE_NELSTMLAYER_H */ 253