1 /* 2 * Copyright (c) 2018-2020 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef ARM_COMPUTE_CLLSTMLAYER_H 25 #define ARM_COMPUTE_CLLSTMLAYER_H 26 27 #include "arm_compute/runtime/IFunction.h" 28 29 #include "arm_compute/core/Types.h" 30 #include "arm_compute/runtime/CL/CLTensor.h" 31 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h" 32 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h" 33 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h" 34 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h" 35 #include "arm_compute/runtime/CL/functions/CLGEMM.h" 36 #include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h" 37 #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h" 38 #include "arm_compute/runtime/IMemoryManager.h" 39 #include "arm_compute/runtime/MemoryGroup.h" 40 #include "arm_compute/runtime/common/LSTMParams.h" 41 42 #include <memory> 43 44 namespace arm_compute 45 { 46 class CLCompileContext; 47 class CLCopyKernel; 48 class CLMemsetKernel; 49 class CLTransposeKernel; 50 class ICLTensor; 51 52 /** This function performs a single time step in a Long Short-Term Memory (LSTM) layer. 53 * 54 */ 55 class CLLSTMLayer : public IFunction 56 { 57 public: 58 /** Default constructor */ 59 CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); 60 /** Prevent instances of this class from being copied */ 61 CLLSTMLayer(const CLLSTMLayer &) = delete; 62 /** Prevent instances of this class from being copied */ 63 CLLSTMLayer &operator=(const CLLSTMLayer &) = delete; 64 /** Prevent instances of this class to be moved */ 65 CLLSTMLayer(CLLSTMLayer &&) = delete; 66 /** Prevent instances of this class to be moved */ 67 CLLSTMLayer &operator=(CLLSTMLayer &&) = delete; 68 /** Default destructor */ 69 ~CLLSTMLayer(); 70 /** Initialize function's tensors. 71 * 72 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. 73 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 74 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 75 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 76 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 77 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 78 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 79 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 80 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 81 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 82 * @param[in] output_state_in 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 83 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 84 * @param[out] scratch_buffer 2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input. 85 * @param[out] output_state_out 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 86 * @param[out] cell_state_out 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 87 * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. 88 * Data types supported: Same as @p input. 89 * @param[in] lstm_params Weights tensors used in peephole optimization: 90 * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 91 * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 92 * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. 93 * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 94 * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 95 * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input 96 * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 97 * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. 98 * input_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 99 * forget_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 100 * cell_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 101 * output_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 102 * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. 103 * @param[in] cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. 104 * If set to 0.0f then clipping is disabled. 105 * @param[in] projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. 106 * If set to 0.0f then clipping is disabled. 107 */ 108 void configure(const ICLTensor *input, 109 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, 110 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, 111 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, 112 const ICLTensor *output_state_in, ICLTensor *cell_state_in, 113 ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output, 114 const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); 115 /** Initialize function's tensors. 116 * 117 * @param[in] compile_context The compile context to be used. 118 * @param[in] input Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. 119 * @param[in] input_to_forget_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 120 * @param[in] input_to_cell_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 121 * @param[in] input_to_output_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 122 * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 123 * @param[in] recurrent_to_cell_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 124 * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 125 * @param[in] forget_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 126 * @param[in] cell_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 127 * @param[in] output_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 128 * @param[in] output_state_in 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 129 * @param[in] cell_state_in 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 130 * @param[out] scratch_buffer 2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input. 131 * @param[out] output_state_out 2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 132 * @param[out] cell_state_out 2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 133 * @param[out] output Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size]. 134 * Data types supported: Same as @p input. 135 * @param[in] lstm_params Weights tensors used in peephole optimization: 136 * input_to_input_weights 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input. 137 * recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 138 * cell_to_input_weights 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. 139 * cell_to_forget_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 140 * cell_to_output_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 141 * input_gate_bias 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input 142 * projection_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input. 143 * projection_bias 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input. 144 * input_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 145 * forget_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 146 * cell_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 147 * output_layer_norm_weights 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input. 148 * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. 149 * @param[in] cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. 150 * If set to 0.0f then clipping is disabled. 151 * @param[in] projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. 152 * If set to 0.0f then clipping is disabled. 153 */ 154 void configure(const CLCompileContext &compile_context, const ICLTensor *input, 155 const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights, 156 const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights, 157 const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias, 158 const ICLTensor *output_state_in, ICLTensor *cell_state_in, 159 ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output, 160 const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); 161 162 /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayer 163 * 164 * @param[in] input Source tensor info. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32. 165 * @param[in] input_to_forget_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input. 166 * @param[in] input_to_cell_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input. 167 * @param[in] input_to_output_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input. 168 * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input. 169 * @param[in] recurrent_to_cell_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input. 170 * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input. 171 * @param[in] forget_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 172 * @param[in] cell_bias 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 173 * @param[in] output_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 174 * @param[in] output_state_in 2D weights tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 175 * @param[in] cell_state_in 2D tensor info with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 176 * @param[in] scratch_buffer 2D tensor info with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. 177 * Data type supported: Same as @p input. 178 * @param[in] output_state_out 2D weights tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input. 179 * @param[in] cell_state_out 2D tensor info with dimensions [num_units, batch_size]. Data type supported: Same as @p input. 180 * @param[in] output Destination tensor info. Output is a 2D tensor with dimensions [output_size, batch_size]. Data types supported: Same as @p input. 181 * @param[in] lstm_params Weights tensors info used in peephole optimization: 182 * input_to_input_weights 2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input. 183 * recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input. 184 * cell_to_input_weights 1D weights tensor info with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input. 185 * cell_to_forget_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 186 * cell_to_output_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 187 * input_gate_bias 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input 188 * projection_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input. 189 * projection_bias 1D weights tensor info with dimensions [output_size]. Data type supported: Same as @p input. 190 * input_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 191 * forget_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 192 * cell_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 193 * output_layer_norm_weights 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input. 194 * @param[in] activation_info Contains activation information described in @ref ActivationLayerInfo. 195 * @param[in] cell_threshold (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. 196 * If set to 0.0f then clipping is disabled. 197 * @param[in] projection_threshold (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip]. 198 * If set to 0.0f then clipping is disabled. 199 * 200 * @return a status 201 */ 202 static Status validate(const ITensorInfo *input, 203 const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights, 204 const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights, 205 const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias, 206 const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in, 207 const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output, 208 const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f); 209 210 // Inherited methods overridden: 211 void run() override; 212 void prepare() override; 213 214 private: 215 MemoryGroup _memory_group; 216 CLFullyConnectedLayer _fully_connected_input_gate; 217 CLArithmeticAddition _accum_input_gate1; 218 CLArithmeticSubtraction _subtract_input_gate; 219 CLPixelWiseMultiplication _pixelwise_mul_input_gate; 220 CLActivationLayer _activation_input_gate; 221 CLFullyConnectedLayer _fully_connected_forget_gate; 222 CLArithmeticAddition _accum_forget_gate1; 223 CLPixelWiseMultiplication _pixelwise_mul_forget_gate; 224 CLActivationLayer _activation_forget_gate; 225 CLFullyConnectedLayer _fully_connected_cell_state; 226 CLGEMM _gemm_cell_state1; 227 std::unique_ptr<CLTransposeKernel> _transpose_cell_state; 228 CLArithmeticAddition _accum_cell_state1; 229 CLArithmeticAddition _accum_cell_state2; 230 CLPixelWiseMultiplication _pixelwise_mul_cell_state1; 231 CLActivationLayer _activation_cell_state; 232 CLActivationLayer _cell_clip; 233 CLPixelWiseMultiplication _pixelwise_mul_cell_state2; 234 CLFullyConnectedLayer _fully_connected_output; 235 CLPixelWiseMultiplication _pixelwise_mul_output_state1; 236 CLArithmeticAddition _accum_output1; 237 CLActivationLayer _activation_output; 238 CLActivationLayer _activation_output_state; 239 CLPixelWiseMultiplication _pixelwise_mul_output_state2; 240 CLFullyConnectedLayer _fully_connected_output_state; 241 CLActivationLayer _projection_clip; 242 std::unique_ptr<CLCopyKernel> _copy_cell_state; 243 std::unique_ptr<CLCopyKernel> _copy_output; 244 CLConcatenateLayer _concat_scratch_buffer; 245 CLConcatenateLayer _concat_inputs_forget_gate; 246 CLConcatenateLayer _concat_weights_forget_gate; 247 CLConcatenateLayer _concat_weights_input_gate; 248 CLConcatenateLayer _concat_weights_output; 249 std::unique_ptr<CLMemsetKernel> _ones_memset_kernel; 250 CLMeanStdDevNormalizationLayer _mean_std_norm_input_gate; 251 CLPixelWiseMultiplication _pixelwise_mul_input_gate_coeff; 252 CLArithmeticAddition _accum_input_gate_bias; 253 CLMeanStdDevNormalizationLayer _mean_std_norm_forget_gate; 254 CLPixelWiseMultiplication _pixelwise_mul_forget_gate_coeff; 255 CLArithmeticAddition _accum_forget_gate_bias; 256 CLMeanStdDevNormalizationLayer _mean_std_norm_cell_gate; 257 CLPixelWiseMultiplication _pixelwise_mul_cell_gate_coeff; 258 CLArithmeticAddition _accum_cell_gate_bias; 259 CLMeanStdDevNormalizationLayer _mean_std_norm_output_gate; 260 CLPixelWiseMultiplication _pixelwise_mul_output_gate_coeff; 261 CLArithmeticAddition _accum_output_gate_bias; 262 CLTensor _input_gate_out1; 263 CLTensor _input_gate_out2; 264 CLTensor _input_gate_out3; 265 CLTensor _input_gate_out4; 266 CLTensor _forget_gate_out1; 267 CLTensor _forget_gate_out2; 268 CLTensor _forget_gate_out3; 269 CLTensor _forget_gate_out4; 270 CLTensor _forget_gate_out5; 271 CLTensor _forget_gate_out6; 272 CLTensor _cell_state_out1; 273 CLTensor _cell_state_out2; 274 CLTensor _cell_state_out3; 275 CLTensor _cell_state_out4; 276 CLTensor _cell_state_out5; 277 CLTensor _output1; 278 CLTensor _output2; 279 CLTensor _output3; 280 CLTensor _output4; 281 CLTensor _cell_state_activation; 282 CLTensor _output_state1; 283 CLTensor _ones; 284 CLTensor _input_layer_norm_out1; 285 CLTensor _input_layer_norm_out2; 286 CLTensor _forget_layer_norm_out1; 287 CLTensor _forget_layer_norm_out2; 288 CLTensor _cell_layer_norm_out1; 289 CLTensor _cell_layer_norm_out2; 290 CLTensor _output_layer_norm_out1; 291 CLTensor _output_layer_norm_out2; 292 bool _run_peephole_opt; 293 bool _run_cifg_opt; 294 bool _perform_cell_clipping; 295 bool _has_projection_weights; 296 bool _perform_projection_clipping; 297 bool _is_prepared; 298 bool _is_layer_norm_lstm; 299 }; 300 } // namespace arm_compute 301 #endif /* ARM_COMPUTE_CLLSTMLAYER_H */ 302