• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2021 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_NELSTMLAYER_H
25 #define ARM_COMPUTE_NELSTMLAYER_H
26 
27 #include "arm_compute/core/Types.h"
28 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
29 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
30 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
31 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
32 #include "arm_compute/runtime/NEON/functions/NECopy.h"
33 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
34 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
35 #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
36 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
37 #include "arm_compute/runtime/NEON/functions/NETranspose.h"
38 #include "arm_compute/runtime/common/LSTMParams.h"
39 
40 namespace arm_compute
41 {
42 // Forward declarations
43 class ITensor;
44 
45 /** Basic function to run @ref NELSTMLayer */
46 class NELSTMLayer : public IFunction
47 {
48 public:
49     /** Default constructor */
50     NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
51     /** Prevent instances of this class from being copied (As this class contains pointers) */
52     NELSTMLayer(const NELSTMLayer &) = delete;
53     /** Prevent instances of this class from being copied (As this class contains pointers) */
54     NELSTMLayer &operator=(const NELSTMLayer &) = delete;
55     /** Prevent instances of this class from being moved (As this class contains non movable objects) */
56     NELSTMLayer(NELSTMLayer &&) = delete;
57     /** Prevent instances of this class from being moved (As this class contains non movable objects) */
58     NELSTMLayer &operator=(NELSTMLayer &&) = delete;
59     /** Default destructor */
60     ~NELSTMLayer();
61     /** Initialize function's tensors.
62      *
63      * Valid data layouts:
64      * - All
65      *
66      * Valid data type configurations:
67      * |src0 - src13 | dst0 - dst3 |
68      * |:------------|:------------|
69      * |F16          |F16          |
70      * |F32          |F32          |
71      *
72      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
73      * @param[in]  input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
74      * @param[in]  input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
75      * @param[in]  input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
76      * @param[in]  recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
77      * @param[in]  recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
78      * @param[in]  recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
79      * @param[in]  forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
80      * @param[in]  cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
81      * @param[in]  output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
82      * @param[in]  output_state_in             2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
83      * @param[in]  cell_state_in               2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
84      * @param[out] scratch_buffer              2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input.
85      * @param[out] output_state_out            2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
86      * @param[out] cell_state_out              2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
87      * @param[out] output                      Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].
88      *                                         Data types supported: Same as @p input.
89      * @param[in]  lstm_params                 Weights tensors used in peephole optimization:
90      *                                         input_to_input_weights     (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
91      *                                         recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
92      *                                         cell_to_input_weights      (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
93      *                                         cell_to_forget_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
94      *                                         cell_to_output_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
95      *                                         input_gate_bias            (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input
96      *                                         projection_weights         (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
97      *                                         projection_bias            (Optional) 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input.
98      *                                         input_layer_norm_weights   (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
99      *                                         forget_layer_norm_weights  (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
100      *                                         cell_layer_norm_weights    (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
101      *                                         output_layer_norm_weights  (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
102      * @param[in]  activation_info             Contains activation information described in @ref ActivationLayerInfo.
103      * @param[in]  cell_threshold              The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled.
104      * @param[in]  projection_threshold        The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
105      *                                         If set to 0.0 then clipping is disabled.
106      */
107     void configure(const ITensor *input,
108                    const ITensor *input_to_forget_weights, const ITensor *input_to_cell_weights, const ITensor *input_to_output_weights,
109                    const ITensor *recurrent_to_forget_weights, const ITensor *recurrent_to_cell_weights, const ITensor *recurrent_to_output_weights,
110                    const ITensor *forget_gate_bias, const ITensor *cell_bias, const ITensor *output_gate_bias,
111                    const ITensor *output_state_in, const ITensor *cell_state_in,
112                    ITensor *scratch_buffer, ITensor *output_state_out, ITensor *cell_state_out, ITensor *output,
113                    const LSTMParams<ITensor> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
114 
115     /** Static function to check if given info will lead to a valid configuration of @ref NELSTMLayer
116      *
117      * @param[in] input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
118      * @param[in] input_to_forget_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
119      * @param[in] input_to_cell_weights       2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
120      * @param[in] input_to_output_weights     2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
121      * @param[in] recurrent_to_forget_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
122      * @param[in] recurrent_to_cell_weights   2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
123      * @param[in] recurrent_to_output_weights 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
124      * @param[in] forget_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
125      * @param[in] cell_bias                   1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
126      * @param[in] output_gate_bias            1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
127      * @param[in] output_state_in             2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
128      * @param[in] cell_state_in               2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
129      * @param[in] scratch_buffer              2D tensor with dimensions [num_units * 4, batch_size] with CIFG or [num_units * 3, batch_size] without CIGF. Data type supported: Same as @p input.
130      * @param[in] output_state_out            2D weights tensor with dimensions [output_size, batch_size]. Data type supported: Same as @p input.
131      * @param[in] cell_state_out              2D tensor with dimensions [num_units, batch_size]. Data type supported: Same as @p input.
132      * @param[in] output                      Destination tensor. Output is a 2D tensor with dimensions [output_size, batch_size].
133      *                                        Data types supported: Same as @p input.
134      * @param[in] lstm_params                 Weights tensors used in peephole optimization:
135      *                                        input_to_input_weights     (Optional) 2D weights tensor with dimensions [input_size, num_units]. Data type supported: Same as @p input.
136      *                                        recurrent_to_input_weights (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
137      *                                        cell_to_input_weights      (Optional) 1D weights tensor with dimensions [num_units]. Can be nullptr. Data type supported: Same as @p input.
138      *                                        cell_to_forget_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
139      *                                        cell_to_output_weights     (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input.
140      *                                        input_gate_bias            (Optional) 1D weights tensor with dimensions [num_units]. Data type supported: Same as @p input
141      *                                        projection_weights         (Optional) 2D weights tensor with dimensions [output_size, num_units]. Data type supported: Same as @p input.
142      *                                        projection_bias            (Optional) 1D weights tensor with dimensions [output_size]. Data type supported: Same as @p input.
143      *                                        input_layer_norm_weights   (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
144      *                                        forget_layer_norm_weights  (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
145      *                                        cell_layer_norm_weights    (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
146      *                                        output_layer_norm_weights  (Optional) 1D weights tensor info with dimensions [num_units]. Data type supported: Same as @p input.
147      * @param[in] activation_info             Contains activation information described in @ref ActivationLayerInfo.
148      * @param[in] cell_threshold              The clipping threshold for the cell state, such that values are bound within [-cell_clip, cell_clip]. If set to 0.0 then clipping is disabled.
149      * @param[in] projection_threshold        The clipping threshold for the output from the projection layer, such that values are bound within [-proj_clip, proj_clip].
150      *                                        If set to 0.0 then clipping is disabled.
151      *
152      * @return a status
153      */
154     static Status validate(const ITensorInfo *input,
155                            const ITensorInfo *input_to_forget_weights, const ITensorInfo *input_to_cell_weights, const ITensorInfo *input_to_output_weights,
156                            const ITensorInfo *recurrent_to_forget_weights, const ITensorInfo *recurrent_to_cell_weights, const ITensorInfo *recurrent_to_output_weights,
157                            const ITensorInfo *forget_gate_bias, const ITensorInfo *cell_bias, const ITensorInfo *output_gate_bias,
158                            const ITensorInfo *output_state_in, const ITensorInfo *cell_state_in,
159                            const ITensorInfo *scratch_buffer, const ITensorInfo *output_state_out, const ITensorInfo *cell_state_out, const ITensorInfo *output,
160                            const LSTMParams<ITensorInfo> &lstm_params, const ActivationLayerInfo &activation_info, float cell_threshold = 0.f, float projection_threshold = 0.f);
161 
162     // Inherited methods overridden:
163     void run() override;
164     void prepare() override;
165 
166 private:
167     MemoryGroup                    _memory_group;
168     NEFullyConnectedLayer          _fully_connected_input_gate;
169     NEArithmeticAddition           _accum_input_gate1;
170     NEArithmeticSubtraction        _subtract_input_gate;
171     NEPixelWiseMultiplication      _pixelwise_mul_input_gate;
172     NEActivationLayer              _activation_input_gate;
173     NEFullyConnectedLayer          _fully_connected_forget_gate;
174     NEArithmeticAddition           _accum_forget_gate1;
175     NEPixelWiseMultiplication      _pixelwise_mul_forget_gate;
176     NEActivationLayer              _activation_forget_gate;
177     NEFullyConnectedLayer          _fully_connected_cell_state;
178     NEGEMM                         _gemm_cell_state1;
179     NETranspose                    _transpose_cell_state;
180     NEArithmeticAddition           _accum_cell_state1;
181     NEArithmeticAddition           _accum_cell_state2;
182     NEPixelWiseMultiplication      _pixelwise_mul_cell_state1;
183     NEActivationLayer              _activation_cell_state;
184     NEActivationLayer              _cell_clip;
185     NEPixelWiseMultiplication      _pixelwise_mul_cell_state2;
186     NEFullyConnectedLayer          _fully_connected_output;
187     NEPixelWiseMultiplication      _pixelwise_mul_output_state1;
188     NEArithmeticAddition           _accum_output1;
189     NEActivationLayer              _activation_output;
190     NEActivationLayer              _activation_output_state;
191     NEPixelWiseMultiplication      _pixelwise_mul_output_state2;
192     NEFullyConnectedLayer          _fully_connected_output_state;
193     NEActivationLayer              _projection_clip;
194     NECopy                         _copy_cell_state;
195     NECopy                         _copy_output;
196     NEConcatenateLayer             _concat_scratch_buffer;
197     NEConcatenateLayer             _concat_inputs_forget_gate;
198     NEConcatenateLayer             _concat_weights_forget_gate;
199     NEConcatenateLayer             _concat_weights_input_gate;
200     NEConcatenateLayer             _concat_weights_output;
201     NEMeanStdDevNormalizationLayer _mean_std_norm_input_gate;
202     NEPixelWiseMultiplication      _pixelwise_mul_input_gate_coeff;
203     NEArithmeticAddition           _accum_input_gate_bias;
204     NEMeanStdDevNormalizationLayer _mean_std_norm_forget_gate;
205     NEPixelWiseMultiplication      _pixelwise_mul_forget_gate_coeff;
206     NEArithmeticAddition           _accum_forget_gate_bias;
207     NEMeanStdDevNormalizationLayer _mean_std_norm_cell_gate;
208     NEPixelWiseMultiplication      _pixelwise_mul_cell_gate_coeff;
209     NEArithmeticAddition           _accum_cell_gate_bias;
210     NEMeanStdDevNormalizationLayer _mean_std_norm_output_gate;
211     NEPixelWiseMultiplication      _pixelwise_mul_output_gate_coeff;
212     NEArithmeticAddition           _accum_output_gate_bias;
213     Tensor                         _input_gate_out1;
214     Tensor                         _input_gate_out2;
215     Tensor                         _input_gate_out3;
216     Tensor                         _input_gate_out4;
217     Tensor                         _forget_gate_out1;
218     Tensor                         _forget_gate_out2;
219     Tensor                         _forget_gate_out3;
220     Tensor                         _forget_gate_out4;
221     Tensor                         _forget_gate_out5;
222     Tensor                         _forget_gate_out6;
223     Tensor                         _cell_state_out1;
224     Tensor                         _cell_state_out2;
225     Tensor                         _cell_state_out3;
226     Tensor                         _cell_state_out4;
227     Tensor                         _cell_state_out5;
228     Tensor                         _output1;
229     Tensor                         _output2;
230     Tensor                         _output3;
231     Tensor                         _output4;
232     Tensor                         _cell_state_activation;
233     Tensor                         _output_state1;
234     Tensor                         _ones;
235     Tensor                         _input_layer_norm_out1;
236     Tensor                         _input_layer_norm_out2;
237     Tensor                         _forget_layer_norm_out1;
238     Tensor                         _forget_layer_norm_out2;
239     Tensor                         _cell_layer_norm_out1;
240     Tensor                         _cell_layer_norm_out2;
241     Tensor                         _output_layer_norm_out1;
242     Tensor                         _output_layer_norm_out2;
243     bool                           _run_peephole_opt;
244     bool                           _run_cifg_opt;
245     bool                           _perform_cell_clipping;
246     bool                           _has_projection_weights;
247     bool                           _perform_projection_clipping;
248     bool                           _is_prepared;
249     bool                           _is_layer_norm_lstm;
250 };
251 } // namespace arm_compute
252 #endif /* ARM_COMPUTE_NELSTMLAYER_H */
253