• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_CLLSTMLAYER_H
25 #define ARM_COMPUTE_CLLSTMLAYER_H
26 
27 #include "arm_compute/runtime/IFunction.h"
28 
29 #include "arm_compute/core/Types.h"
30 #include "arm_compute/runtime/CL/CLTensor.h"
31 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
32 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
33 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
34 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
35 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
36 #include "arm_compute/runtime/CL/functions/CLMeanStdDevNormalizationLayer.h"
37 #include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
38 #include "arm_compute/runtime/IMemoryManager.h"
39 #include "arm_compute/runtime/MemoryGroup.h"
40 #include "arm_compute/runtime/common/LSTMParams.h"
41 
42 #include <memory>
43 
44 namespace arm_compute
45 {
46 class CLCompileContext;
47 class CLCopyKernel;
48 class CLMemsetKernel;
49 class CLTransposeKernel;
50 class ICLTensor;
51 
52 /** This function performs a single time step in a Long Short-Term Memory (LSTM) layer.
53  *
54  */
55 class CLLSTMLayer : public IFunction
56 {
57 public:
58     /** Default constructor */
59     CLLSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
60     /** Prevent instances of this class from being copied */
61     CLLSTMLayer(const CLLSTMLayer &) = delete;
62     /** Prevent instances of this class from being copied */
63     CLLSTMLayer &operator=(const CLLSTMLayer &) = delete;
64     /** Prevent instances of this class to be moved */
65     CLLSTMLayer(CLLSTMLayer &&) = delete;
66     /** Prevent instances of this class to be moved */
67     CLLSTMLayer &operator=(CLLSTMLayer &&) = delete;
68     /** Default destructor */
69     ~CLLSTMLayer();
70     /** Initialize function's tensors.
71      *
72      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
73      * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
74      * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
75      * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
76      * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
77      * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
78      * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
79      * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
80      * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
81      * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
82      * @param[in]  output_state_in             2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
83      * @param[in]  cell_state_in               2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
84      * @param[out] scratch_buffer              2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input.
85      * @param[out] output_state_out            2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
86      * @param[out] cell_state_out              2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
87      * @param[out] output                      Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].
88      *                                         Data types supported: Same as @p input.
89      * @param[in]  lstm_params                 Weights tensors used in peephole optimization:
90      *                                         input_to_input_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
91      *                                         recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
92      *                                         cell_to_input_weights      1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
93      *                                         cell_to_forget_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
94      *                                         cell_to_output_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
95      *                                         input_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input
96      *                                         projection_weights         2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
97      *                                         projection_bias            1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input.
98      *                                         input_layer_norm_weights   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
99      *                                         forget_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
100      *                                         cell_layer_norm_weights    1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
101      *                                         output_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
102      * @param[in]  activation_info             Contains activation information described in @ref ActivationLayerInfo.
103      * @param[in]  cell_threshold              (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
104      *                                         If set to 0.0f then clipping is disabled.
105      * @param[in]  projection_threshold        (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
106      *                                         If set to 0.0f then clipping is disabled.
107      */
108     void configure(const ICLTensor *input,
109                    const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
110                    const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
111                    const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
112                    const ICLTensor *output_state_in, ICLTensor *cell_state_in,
113                    ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
114                    const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
115     /** Initialize function's tensors.
116      *
117      * @param[in]  compile_context             The compile context to be used.
118      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
119      * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
120      * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
121      * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
122      * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
123      * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
124      * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
125      * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
126      * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
127      * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
128      * @param[in]  output_state_in             2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
129      * @param[in]  cell_state_in               2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
130      * @param[out] scratch_buffer              2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input.
131      * @param[out] output_state_out            2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
132      * @param[out] cell_state_out              2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
133      * @param[out] output                      Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].
134      *                                         Data types supported: Same as @p input.
135      * @param[in]  lstm_params                 Weights tensors used in peephole optimization:
136      *                                         input_to_input_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
137      *                                         recurrent_to_input_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
138      *                                         cell_to_input_weights      1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
139      *                                         cell_to_forget_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
140      *                                         cell_to_output_weights     1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
141      *                                         input_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input
142      *                                         projection_weights         2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
143      *                                         projection_bias            1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input.
144      *                                         input_layer_norm_weights   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
145      *                                         forget_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
146      *                                         cell_layer_norm_weights    1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
147      *                                         output_layer_norm_weights  1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
148      * @param[in]  activation_info             Contains activation information described in @ref ActivationLayerInfo.
149      * @param[in]  cell_threshold              (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
150      *                                         If set to 0.0f then clipping is disabled.
151      * @param[in]  projection_threshold        (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
152      *                                         If set to 0.0f then clipping is disabled.
153      */
154     void configure(const CLCompileContext &compile_context, const ICLTensor *input,
155                    const ICLTensor *input_to_forget_weights, const ICLTensor *input_to_cell_weights, const ICLTensor *input_to_output_weights,
156                    const ICLTensor *recurrent_to_forget_weights, const ICLTensor *recurrent_to_cell_weights, const ICLTensor *recurrent_to_output_weights,
157                    const ICLTensor *forget_gate_bias, const ICLTensor *cell_bias, const ICLTensor *output_gate_bias,
158                    const ICLTensor *output_state_in, ICLTensor *cell_state_in,
159                    ICLTensor *scratch_buffer, ICLTensor *output_state_out, ICLTensor *cell_state_out, ICLTensor *output,
160                    const LSTMParams<ICLTensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
161 
162     /** Static function to check if given info will lead to a valid configuration of @ref CLLSTMLayer
163      *
164      * @param[in] input                       Source tensor info. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
165      * @param[in] input_to_forget_weights     2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input.
166      * @param[in] input_to_cell_weights       2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input.
167      * @param[in] input_to_output_weights     2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input.
168      * @param[in] recurrent_to_forget_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input.
169      * @param[in] recurrent_to_cell_weights   2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input.
170      * @param[in] recurrent_to_output_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input.
171      * @param[in] forget_gate_bias            1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
172      * @param[in] cell_bias                   1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
173      * @param[in] output_gate_bias            1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
174      * @param[in] output_state_in             2D weights tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
175      * @param[in] cell_state_in               2D tensor info with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
176      * @param[in] scratch_buffer              2D tensor info with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF.
177      *                                        Data type supported: Same as @p input.
178      * @param[in] output_state_out            2D weights tensor info with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
179      * @param[in] cell_state_out              2D tensor info with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
180      * @param[in] output                      Destination tensor info. Output is a 2D tensor with dimensions [output_size, batch_size]. Data types supported: Same as @p input.
181      * @param[in] lstm_params                 Weights tensors info used in peephole optimization:
182      *                                        input_to_input_weights     2D weights tensor info with dimensions [input_size, num_units]. Data type supported: Same as @p input.
183      *                                        recurrent_to_input_weights 2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input.
184      *                                        cell_to_input_weights      1D weights tensor info with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
185      *                                        cell_to_forget_weights     1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
186      *                                        cell_to_output_weights     1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
187      *                                        input_gate_bias            1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input
188      *                                        projection_weights         2D weights tensor info with dimensions [output_size, num_units]. Data type supported: Same as @p input.
189      *                                        projection_bias            1D weights tensor info with dimensions [output_size]. Data type supported: Same as @p input.
190      *                                        input_layer_norm_weights   1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
191      *                                        forget_layer_norm_weights  1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
192      *                                        cell_layer_norm_weights    1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
193      *                                        output_layer_norm_weights  1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
194      * @param[in] activation_info             Contains activation information described in @ref ActivationLayerInfo.
195      * @param[in] cell_threshold              (Optional) The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip].
196      *                                        If set to 0.0f then clipping is disabled.
197      * @param[in] projection_threshold        (Optional) The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
198      *                                        If set to 0.0f then clipping is disabled.
199      *
200      * @return a status
201      */
202     static Status validate(const ITensorInfo *input,
203                            const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
204                            const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
205                            const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
206                            const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
207                            const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
208                            const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
209 
210     // Inherited methods overridden:
211     void run() override;
212     void prepare() override;
213 
214 private:
215     MemoryGroup                        _memory_group;
216     CLFullyConnectedLayer              _fully_connected_input_gate;
217     CLArithmeticAddition               _accum_input_gate1;
218     CLArithmeticSubtraction            _subtract_input_gate;
219     CLPixelWiseMultiplication          _pixelwise_mul_input_gate;
220     CLActivationLayer                  _activation_input_gate;
221     CLFullyConnectedLayer              _fully_connected_forget_gate;
222     CLArithmeticAddition               _accum_forget_gate1;
223     CLPixelWiseMultiplication          _pixelwise_mul_forget_gate;
224     CLActivationLayer                  _activation_forget_gate;
225     CLFullyConnectedLayer              _fully_connected_cell_state;
226     CLGEMM                             _gemm_cell_state1;
227     std::unique_ptr<CLTransposeKernel> _transpose_cell_state;
228     CLArithmeticAddition               _accum_cell_state1;
229     CLArithmeticAddition               _accum_cell_state2;
230     CLPixelWiseMultiplication          _pixelwise_mul_cell_state1;
231     CLActivationLayer                  _activation_cell_state;
232     CLActivationLayer                  _cell_clip;
233     CLPixelWiseMultiplication          _pixelwise_mul_cell_state2;
234     CLFullyConnectedLayer              _fully_connected_output;
235     CLPixelWiseMultiplication          _pixelwise_mul_output_state1;
236     CLArithmeticAddition               _accum_output1;
237     CLActivationLayer                  _activation_output;
238     CLActivationLayer                  _activation_output_state;
239     CLPixelWiseMultiplication          _pixelwise_mul_output_state2;
240     CLFullyConnectedLayer              _fully_connected_output_state;
241     CLActivationLayer                  _projection_clip;
242     std::unique_ptr<CLCopyKernel>      _copy_cell_state;
243     std::unique_ptr<CLCopyKernel>      _copy_output;
244     CLConcatenateLayer                 _concat_scratch_buffer;
245     CLConcatenateLayer                 _concat_inputs_forget_gate;
246     CLConcatenateLayer                 _concat_weights_forget_gate;
247     CLConcatenateLayer                 _concat_weights_input_gate;
248     CLConcatenateLayer                 _concat_weights_output;
249     std::unique_ptr<CLMemsetKernel>    _ones_memset_kernel;
250     CLMeanStdDevNormalizationLayer     _mean_std_norm_input_gate;
251     CLPixelWiseMultiplication          _pixelwise_mul_input_gate_coeff;
252     CLArithmeticAddition               _accum_input_gate_bias;
253     CLMeanStdDevNormalizationLayer     _mean_std_norm_forget_gate;
254     CLPixelWiseMultiplication          _pixelwise_mul_forget_gate_coeff;
255     CLArithmeticAddition               _accum_forget_gate_bias;
256     CLMeanStdDevNormalizationLayer     _mean_std_norm_cell_gate;
257     CLPixelWiseMultiplication          _pixelwise_mul_cell_gate_coeff;
258     CLArithmeticAddition               _accum_cell_gate_bias;
259     CLMeanStdDevNormalizationLayer     _mean_std_norm_output_gate;
260     CLPixelWiseMultiplication          _pixelwise_mul_output_gate_coeff;
261     CLArithmeticAddition               _accum_output_gate_bias;
262     CLTensor                           _input_gate_out1;
263     CLTensor                           _input_gate_out2;
264     CLTensor                           _input_gate_out3;
265     CLTensor                           _input_gate_out4;
266     CLTensor                           _forget_gate_out1;
267     CLTensor                           _forget_gate_out2;
268     CLTensor                           _forget_gate_out3;
269     CLTensor                           _forget_gate_out4;
270     CLTensor                           _forget_gate_out5;
271     CLTensor                           _forget_gate_out6;
272     CLTensor                           _cell_state_out1;
273     CLTensor                           _cell_state_out2;
274     CLTensor                           _cell_state_out3;
275     CLTensor                           _cell_state_out4;
276     CLTensor                           _cell_state_out5;
277     CLTensor                           _output1;
278     CLTensor                           _output2;
279     CLTensor                           _output3;
280     CLTensor                           _output4;
281     CLTensor                           _cell_state_activation;
282     CLTensor                           _output_state1;
283     CLTensor                           _ones;
284     CLTensor                           _input_layer_norm_out1;
285     CLTensor                           _input_layer_norm_out2;
286     CLTensor                           _forget_layer_norm_out1;
287     CLTensor                           _forget_layer_norm_out2;
288     CLTensor                           _cell_layer_norm_out1;
289     CLTensor                           _cell_layer_norm_out2;
290     CLTensor                           _output_layer_norm_out1;
291     CLTensor                           _output_layer_norm_out2;
292     bool                               _run_peephole_opt;
293     bool                               _run_cifg_opt;
294     bool                               _perform_cell_clipping;
295     bool                               _has_projection_weights;
296     bool                               _perform_projection_clipping;
297     bool                               _is_prepared;
298     bool                               _is_layer_norm_lstm;
299 };
300 } // namespace arm_compute
301 #endif /* ARM_COMPUTE_CLLSTMLAYER_H */
302