• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LstmTestImpl.hpp"
7 
8 #include <QuantizeHelper.hpp>
9 
10 #include <armnn/utility/NumericCast.hpp>
11 
12 #include <backendsCommon/CpuTensorHandle.hpp>
13 
14 #include <backendsCommon/test/TensorCopyUtils.hpp>
15 #include <backendsCommon/test/WorkloadTestUtils.hpp>
16 
17 #include <reference/workloads/Decoders.hpp>
18 #include <reference/workloads/Encoders.hpp>
19 #include <reference/workloads/LstmUtils.hpp>
20 
21 #include <test/TensorHelpers.hpp>
22 
23 #include <boost/multi_array.hpp>
24 
25 namespace
26 {
27 
28 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
LstmUtilsVectorBatchVectorAddTestImpl(boost::multi_array<float,1> & vec,boost::multi_array<float,2> & batchVec,uint32_t vSize,uint32_t nBatch,boost::multi_array<float,2> & expectedOutput)29 void LstmUtilsVectorBatchVectorAddTestImpl(
30         boost::multi_array<float, 1>& vec,
31         boost::multi_array<float, 2>& batchVec,
32         uint32_t vSize,
33         uint32_t nBatch,
34         boost::multi_array<float, 2>& expectedOutput )
35 {
36     float qScale = 0.0f;
37     int32_t qOffset = 0;
38     armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType,  qScale, qOffset );
39 
40     // Make encoder and decoder
41     std::unique_ptr<armnn::Decoder<float>> vecDecoder = armnn::MakeDecoder<float>(tensorInfo, vec.data());
42     std::unique_ptr<armnn::Decoder<float>> batchVecDecoder = armnn::MakeDecoder<float>(tensorInfo, batchVec.data());
43     std::unique_ptr<armnn::Encoder<float>> batchVecEncoder = armnn::MakeEncoder<float>(tensorInfo, batchVec.data());
44 
45     VectorBatchVectorAdd(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder);
46 
47     // check shape and compare values
48     BOOST_TEST(CompareTensors(batchVec, expectedOutput));
49 
50     // check if iterator is back at start position
51     batchVecEncoder->Set(1.0f);
52     BOOST_TEST(batchVec[0][0] == 1.0f);
53 }
54 
55 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
LstmUtilsZeroVectorTestImpl(boost::multi_array<float,1> & input,uint32_t vSize,boost::multi_array<float,1> & expectedOutput)56 void LstmUtilsZeroVectorTestImpl(
57         boost::multi_array<float, 1>& input,
58         uint32_t vSize,
59         boost::multi_array<float, 1>& expectedOutput)
60 {
61     float qScale = 0.0f;
62     int32_t qOffset = 0;
63 
64     armnn::TensorInfo tensorInfo({vSize}, ArmnnType,  qScale, qOffset );
65 
66     // Make encoder for input
67     std::unique_ptr<armnn::Encoder<float>> outputEncoder = armnn::MakeEncoder<float>(tensorInfo, input.data());
68 
69     // call ZeroVector
70     ZeroVector(*outputEncoder, vSize);
71 
72     // check shape and compare values
73     BOOST_TEST(CompareTensors(input, expectedOutput));
74 
75     // check if iterator is back at start position
76     outputEncoder->Set(1.0f);
77     BOOST_TEST(input[0] == 1.0f);
78 
79 }
80 
81 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
LstmUtilsMeanStddevNormalizationTestImpl(boost::multi_array<float,2> & input,uint32_t vSize,uint32_t nBatch,boost::multi_array<float,2> & expectedOutput)82 void LstmUtilsMeanStddevNormalizationTestImpl(
83         boost::multi_array<float, 2>& input,
84         uint32_t vSize,
85         uint32_t nBatch,
86         boost::multi_array<float, 2>& expectedOutput)
87 {
88     float qScale = 0.0f;
89     int32_t qOffset = 0;
90     armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType,  qScale, qOffset );
91 
92     // Make encoder and decoder for input
93     std::unique_ptr<armnn::Decoder<float>> inputDecoder = armnn::MakeDecoder<float>(tensorInfo, input.data());
94     std::unique_ptr<armnn::Encoder<float>> outputEncoder = armnn::MakeEncoder<float>(tensorInfo, input.data());
95 
96     MeanStddevNormalization(*inputDecoder, *outputEncoder, vSize, nBatch, 1e-8f);
97 
98     // check shape and compare values
99     BOOST_TEST(CompareTensors(input, expectedOutput));
100 
101     // check if iterator is back at start position
102     outputEncoder->Set(1.0f);
103     BOOST_TEST(input[0][0] == 1.0f);
104 }
105 
106 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
LstmUtilsVectorBatchVectorCwiseProductTestImpl(boost::multi_array<float,1> & vec,boost::multi_array<float,2> & batchVec,uint32_t vSize,uint32_t nBatch,boost::multi_array<float,2> & expectedOutput)107 void LstmUtilsVectorBatchVectorCwiseProductTestImpl(
108         boost::multi_array<float, 1>& vec,
109         boost::multi_array<float, 2>& batchVec,
110         uint32_t vSize,
111         uint32_t nBatch,
112         boost::multi_array<float, 2>& expectedOutput)
113 {
114     float qScale = 0.0f;
115     int32_t qOffset = 0;
116     armnn::TensorInfo tensorInfo({nBatch, vSize}, ArmnnType,  qScale, qOffset );
117 
118     // Make encoder and decoder
119     std::unique_ptr<armnn::Decoder<float>> vecDecoder = armnn::MakeDecoder<float>(tensorInfo, vec.data());
120     std::unique_ptr<armnn::Decoder<float>> batchVecDecoder = armnn::MakeDecoder<float>(tensorInfo, batchVec.data());
121     std::unique_ptr<armnn::Encoder<float>> batchVecEncoder = armnn::MakeEncoder<float>(tensorInfo, batchVec.data());
122 
123     VectorBatchVectorCwiseProduct(*vecDecoder, vSize, *batchVecDecoder, nBatch, *batchVecEncoder);
124 
125     // check shape and compare values
126     BOOST_TEST(CompareTensors(batchVec, expectedOutput));
127 
128     // check if iterator is back at start position
129     batchVecEncoder->Set(1.0f);
130     BOOST_TEST(batchVec[0][0] == 1.0f);
131 }
132 
133 // Lstm Layer tests:
134 // *********************************** //
135 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
136 LayerTestResult<T, 2>
LstmNoCifgNoPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<T,2> & input,const boost::multi_array<T,2> & outputExpected,float qScale=0.0f,int32_t qOffset=0,armnn::DataType constantDataType=armnn::DataType::Float32)137 LstmNoCifgNoPeepholeNoProjectionTestImpl(
138         armnn::IWorkloadFactory& workloadFactory,
139         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
140         const armnn::ITensorHandleFactory& tensorHandleFactory,
141         const boost::multi_array<T, 2>& input,
142         const boost::multi_array<T, 2>& outputExpected,
143         float qScale = 0.0f,
144         int32_t qOffset = 0,
145         armnn::DataType constantDataType = armnn::DataType::Float32)
146 {
147     IgnoreUnused(memoryManager);
148     unsigned int batchSize = armnn::numeric_cast<unsigned int>(input.shape()[0]);
149     unsigned int inputSize = armnn::numeric_cast<unsigned int>(input.shape()[1]);
150     unsigned int outputSize = armnn::numeric_cast<unsigned int>(outputExpected.shape()[1]);
151     // cellSize and outputSize have the same size when there is no projection.
152     unsigned numUnits = outputSize;
153 
154     armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType,  qScale, qOffset );
155     armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, ArmnnType, qScale, qOffset);
156     armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, ArmnnType, qScale, qOffset);
157 
158     armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, ArmnnType, qScale, qOffset);
159     armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, ArmnnType, qScale, qOffset);
160     armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
161     armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
162 
163     LayerTestResult<T, 2> ret(outputTensorInfo);
164 
165     std::vector<T> inputVector;
166     inputVector.assign(input.data(), input.data() + (batchSize * inputSize));
167     auto inputTensor = MakeTensor<T,2>(inputTensorInfo, inputVector);
168 
169     std::vector<T> cellStateInVector(batchSize * numUnits, T());
170     auto cellStateInTensor = MakeTensor<T,2>(cellStateInTensorInfo, cellStateInVector);
171 
172     std::vector<T> outputStateInVector(batchSize * outputSize, T());
173     auto outputStateInTensor = MakeTensor<T,2>(outputStateInTensorInfo, outputStateInVector);
174 
175     std::vector<T> scratchBufferVector(batchSize * numUnits * 4, T());
176     auto scratchBufferTensor = MakeTensor<T,2>(scratchBufferTensorInfo, scratchBufferVector);
177 
178     std::vector<T> outputStateOutVector(batchSize * outputSize, T());
179     auto outputStateOutTensor = MakeTensor<T,2>(outputStateOutTensorInfo, outputStateOutVector);
180 
181     std::vector<T> cellStateOutVector(batchSize * numUnits, T());
182     auto cellStateOutTensor = MakeTensor<T,2>(cellStateOutTensorInfo, cellStateOutVector);
183 
184     std::vector<T> outputVector;
185     outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize));
186     ret.outputExpected = MakeTensor<T, 2>(outputTensorInfo, outputVector);
187 
188     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
189     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
190             tensorHandleFactory.CreateTensorHandle(cellStateInTensorInfo);
191     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
192             tensorHandleFactory.CreateTensorHandle(outputStateInTensorInfo);
193 
194     std::unique_ptr<armnn::ITensorHandle> scratchHandle =
195             tensorHandleFactory.CreateTensorHandle(scratchBufferTensorInfo);
196     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
197             tensorHandleFactory.CreateTensorHandle(outputStateOutTensorInfo);
198     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
199             tensorHandleFactory.CreateTensorHandle(cellStateOutTensorInfo);
200     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
201 
202     armnn::LstmQueueDescriptor data;
203     armnn::WorkloadInfo info;
204 
205     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
206     AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get());
207     AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get());
208 
209     AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get());
210     AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get());
211     AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get());
212     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
213 
214     armnn::TensorInfo tensorInfo4({numUnits}, constantDataType , qScale, qOffset);
215     armnn::TensorInfo tensorInfo8({numUnits, 2}, constantDataType, qScale, qOffset);
216     armnn::TensorInfo tensorInfo16({numUnits, 4}, constantDataType, qScale, qOffset);
217 
218     auto inputToInputWeights = MakeTensor<float, 2>(tensorInfo8, {-0.45018822f, -0.02338299f, -0.0870589f,
219                                                                   -0.34550029f, 0.04266912f, -0.15680569f,
220                                                                   -0.34856534f, 0.43890524f});
221 
222     auto inputToForgetWeights = MakeTensor<float, 2>(tensorInfo8, {0.09701663f, 0.20334584f, -0.50592935f,
223                                                                    -0.31343272f, -0.40032279f, 0.44781327f,
224                                                                    0.01387155f, -0.35593212f});
225 
226     auto inputToCellWeights = MakeTensor<float, 2>(tensorInfo8, {-0.50013041f, 0.1370284f, 0.11810488f, 0.2013163f,
227                                                                  -0.20583314f, 0.44344562f, 0.22077113f,
228                                                                  -0.29909778f});
229 
230     auto inputToOutputWeights = MakeTensor<float, 2>(tensorInfo8, {-0.25065863f, -0.28290087f, 0.04613829f,
231                                                                    0.40525138f, 0.44272184f, 0.03897077f,
232                                                                    -0.1556896f, 0.19487578f});
233 
234     auto recurrentToInputWeights = MakeTensor<float, 2>(tensorInfo16, {-0.0063535f, -0.2042388f, 0.31454784f,
235                                                                        -0.35746509f, 0.28902304f, 0.08183324f,
236                                                                        -0.16555229f, 0.02286911f, -0.13566875f,
237                                                                        0.03034258f, 0.48091322f, -0.12528998f,
238                                                                        0.24077177f, -0.51332325f, -0.33502164f,
239                                                                        0.10629296f});
240 
241     auto recurrentToForgetWeights = MakeTensor<float, 2>(tensorInfo16, {-0.48684245f, -0.06655136f, 0.42224967f,
242                                                                         0.2112639f, 0.27654213f, 0.20864892f,
243                                                                         -0.07646349f, 0.45877004f, 0.00141793f,
244                                                                         -0.14609534f, 0.36447752f, 0.09196436f,
245                                                                         0.28053468f, 0.01560611f, -0.20127171f,
246                                                                         -0.01140004f});
247 
248     auto recurrentToCellWeights = MakeTensor<float, 2>(tensorInfo16, {-0.3407414f, 0.24443203f, -0.2078532f,
249                                                                       0.26320225f, 0.05695659f, -0.00123841f,
250                                                                       -0.4744786f, -0.35869038f, -0.06418842f,
251                                                                       -0.13502428f, -0.501764f, 0.22830659f,
252                                                                       -0.46367589f, 0.26016325f, -0.03894562f,
253                                                                       -0.16368064f});
254 
255     auto recurrentToOutputWeights = MakeTensor<float, 2>(tensorInfo16, {0.43385774f, -0.17194885f, 0.2718237f,
256                                                                         0.09215671f, 0.24107647f, -0.39835793f,
257                                                                         0.18212086f, 0.01301402f, 0.48572797f,
258                                                                         -0.50656658f, 0.20047462f, -0.20607421f,
259                                                                         -0.51818722f, -0.15390486f, 0.0468148f,
260                                                                         0.39922136f});
261 
262     auto cellToInputWeights = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.});
263 
264     auto inputGateBias = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.});
265 
266     auto forgetGateBias = MakeTensor<float, 1>(tensorInfo4, {1., 1., 1., 1.});
267 
268     auto cellBias = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.});
269 
270     auto outputGateBias = MakeTensor<float, 1>(tensorInfo4, {0., 0., 0., 0.});
271 
272     armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo8);
273     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo8);
274     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo8);
275     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo8);
276     armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo16);
277     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo16);
278     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo16);
279     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo16);
280     armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4);
281     armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4);
282     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4);
283     armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4);
284     armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4);
285 
286     AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
287     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
288     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
289     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
290     AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
291     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
292     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
293     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
294     AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]);
295     AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
296     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
297     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
298     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
299 
300     data.m_InputToInputWeights = &inputToInputWeightsTensor;
301     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
302     data.m_InputToCellWeights = &inputToCellWeightsTensor;
303     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
304     data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
305     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
306     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
307     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
308     data.m_InputGateBias = &inputGateBiasTensor;
309     data.m_ForgetGateBias = &forgetGateBiasTensor;
310     data.m_CellBias = &cellBiasTensor;
311     data.m_OutputGateBias = &outputGateBiasTensor;
312 
313     // Flags to set test configuration
314     data.m_Parameters.m_ActivationFunc = 4;
315     data.m_Parameters.m_CifgEnabled = false;
316     data.m_Parameters.m_PeepholeEnabled = false;
317     data.m_Parameters.m_ProjectionEnabled = false;
318 
319     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info);
320     inputHandle->Allocate();
321     outputStateInHandle->Allocate();
322     cellStateInHandle->Allocate();
323 
324     scratchHandle->Allocate();
325     outputStateOutHandle->Allocate();
326     cellStateOutHandle->Allocate();
327     outputHandle->Allocate();
328 
329     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
330     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
331     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
332 
333     workload->Execute();
334 
335     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
336 
337     return ret;
338 }
339 
340 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
341 LayerTestResult<T, 2>
LstmLayerNoCifgWithPeepholeWithProjectionTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<T,2> & input,const boost::multi_array<T,2> & outputExpected,float qScale=0.0f,int32_t qOffset=0,armnn::DataType constantDataType=armnn::DataType::Float32)342 LstmLayerNoCifgWithPeepholeWithProjectionTestImpl(armnn::IWorkloadFactory& workloadFactory,
343                                                   const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
344                                                   const armnn::ITensorHandleFactory& tensorHandleFactory,
345                                                   const boost::multi_array<T, 2>& input,
346                                                   const boost::multi_array<T, 2>& outputExpected,
347                                                   float qScale = 0.0f,
348                                                   int32_t qOffset = 0,
349                                                   armnn::DataType constantDataType = armnn::DataType::Float32)
350 {
351     IgnoreUnused(memoryManager);
352     unsigned int batchSize = 2;
353     unsigned int outputSize = 16;
354     unsigned int inputSize = 5;
355     unsigned numUnits = 20;
356 
357     armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType, qScale, qOffset);
358     armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, ArmnnType, qScale, qOffset);
359     armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, ArmnnType, qScale, qOffset);
360 
361     // Scratch buffer size without CIFG [batchSize, numUnits * 4]
362     armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, ArmnnType, qScale, qOffset);
363     armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, ArmnnType, qScale, qOffset);
364     armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
365     armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
366 
367     LayerTestResult<T, 2> ret(outputTensorInfo);
368 
369     std::vector<T> inputVector;
370     inputVector.assign(input.data(), input.data() + (batchSize * inputSize));
371     auto inputTensor = MakeTensor<T,2>(inputTensorInfo, inputVector);
372 
373     std::vector<T> cellStateInVector(batchSize * numUnits, T());
374     auto cellStateInTensor = MakeTensor<T,2>(cellStateInTensorInfo, cellStateInVector);
375 
376     std::vector<T> outputStateInVector(batchSize * outputSize, T());
377     auto outputStateInTensor = MakeTensor<T,2>(outputStateInTensorInfo, outputStateInVector);
378 
379     std::vector<T> scratchBufferVector(batchSize * numUnits * 4, T());
380     auto scratchBufferTensor = MakeTensor<T,2>(scratchBufferTensorInfo, scratchBufferVector);
381 
382     std::vector<T> outputStateOutVector(batchSize * outputSize, T());
383     auto outputStateOutTensor = MakeTensor<T,2>(outputStateOutTensorInfo, outputStateOutVector);
384 
385     std::vector<T> cellStateOutVector(batchSize * numUnits, T());
386     auto cellStateOutTensor = MakeTensor<T,2>(cellStateOutTensorInfo, cellStateOutVector);
387 
388     std::vector<T> outputVector;
389     outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize));
390     ret.outputExpected = MakeTensor<T, 2>(outputTensorInfo, outputVector);
391 
392     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
393     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
394             tensorHandleFactory.CreateTensorHandle(cellStateInTensorInfo);
395     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
396             tensorHandleFactory.CreateTensorHandle(outputStateInTensorInfo);
397 
398     std::unique_ptr<armnn::ITensorHandle> scratchHandle =
399             tensorHandleFactory.CreateTensorHandle(scratchBufferTensorInfo);
400     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
401             tensorHandleFactory.CreateTensorHandle(outputStateOutTensorInfo);
402     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
403             tensorHandleFactory.CreateTensorHandle(cellStateOutTensorInfo);
404     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
405 
406     armnn::LstmQueueDescriptor data;
407     armnn::WorkloadInfo info;
408 
409     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
410     AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get());
411     AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get());
412 
413     AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get());
414     AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get());
415     AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get());
416     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
417 
418     armnn::TensorInfo tensorInfo16({outputSize}, constantDataType, qScale, qOffset);
419     armnn::TensorInfo tensorInfo20({numUnits}, constantDataType, qScale, qOffset);
420     armnn::TensorInfo tensorInfo20x5({numUnits, inputSize}, constantDataType, qScale, qOffset);
421     armnn::TensorInfo tensorInfo20x16({numUnits, outputSize}, constantDataType, qScale, qOffset);
422     armnn::TensorInfo tensorInfo16x20({outputSize, numUnits}, constantDataType, qScale, qOffset);
423 
424     auto inputToInputWeights =
425             MakeTensor<float, 2>(tensorInfo20x5, {0.021393683f,0.06124551f,  0.046905167f,-0.014657677f,-0.03149463f,
426                                                   0.09171803f, 0.14647801f,0.10797193f,   -0.0057968358f,0.0019193048f,
427                                                   -0.2726754f, 0.10154029f, -0.018539885f, 0.080349885f, -0.10262385f,
428                                                   -0.022599787f,-0.09121155f, -0.008675967f, -0.045206103f,-0.0821282f,
429                                                   -0.008045952f,0.015478081f, 0.055217247f,  0.038719587f, 0.044153627f,
430                                                   -0.06453243f,0.05031825f, -0.046935108f, -0.008164439f, 0.014574226f,
431                                                   -0.1671009f,   -0.15519552f, -0.16819797f,-0.13971269f,-0.11953059f,
432                                                   0.25005487f, -0.22790983f, 0.009855087f,  -0.028140958f, -0.11200698f,
433                                                   0.11295408f, -0.0035217577f, 0.054485075f,  0.05184695f, 0.064711206f,
434                                                   0.10989193f,   0.11674786f,  0.03490607f, 0.07727357f, 0.11390585f,
435                                                   -0.1863375f,  -0.1034451f, -0.13945189f, -0.049401227f, -0.18767063f,
436                                                   0.042483903f, 0.14233552f, 0.13832581f, 0.18350165f,    0.14545603f,
437                                                   -0.028545704f,0.024939531f,0.050929718f,0.0076203286f,-0.0029723682f,
438                                                   -0.042484224f, -0.11827596f, -0.09171104f,  -0.10808628f,-0.16327988f,
439                                                   -0.2273378f,   -0.0993647f, -0.017155107f,0.0023917493f,0.049272764f,
440                                                   0.0038534778f, 0.054764505f,   0.089753784f, 0.06947234f, 0.08014476f,
441                                                   -0.04544234f, -0.0497073f,-0.07135631f,  -0.048929106f,-0.004042012f,
442                                                   -0.009284026f, 0.018042054f, 0.0036860977f,-0.07427302f, -0.11434604f,
443                                                   -0.018995456f, 0.031487543f, 0.012834908f,0.019977754f,0.044256654f,
444                                                   -0.39292613f,  -0.18519334f, -0.11651281f,-0.06809892f, 0.011373677f
445             });
446 
447     auto inputToForgetWeights =
448             MakeTensor<float, 2>(tensorInfo20x5, {-0.0018401089f, -0.004852237f,0.03698424f, 0.014181704f,0.028273236f,
449                                                    -0.016726194f, -0.05249759f,-0.10204261f, 0.00861066f,-0.040979505f,
450                                                    -0.009899187f,0.01923892f,-0.028177269f, -0.08535103f,-0.14585495f,
451                                                    0.10662567f,-0.01909731f,-0.017883534f,-0.0047269356f,-0.045103323f,
452                                                    0.0030784295f,0.076784775f,0.07463696f, 0.094531395f,0.0814421f,
453                                                    -0.12257899f, -0.033945758f,-0.031303465f, 0.045630626f,0.06843887f,
454                                                    -0.13492945f, -0.012480007f,-0.0811829f, -0.07224499f,-0.09628791f,
455                                                    0.045100946f,0.0012300825f, 0.013964662f, 0.099372394f,0.02543059f,
456                                                    0.06958324f,    0.034257296f, 0.0482646f, 0.06267997f,0.052625068f,
457                                                    0.12784666f,    0.07077897f,  0.025725935f, 0.04165009f,0.07241905f,
458                                                    0.018668644f, -0.037377294f,-0.06277783f,-0.08833636f,-0.040120605f,
459                                                    -0.011405586f,-0.007808335f,-0.010301386f,-0.005102167f,0.027717464f,
460                                                    0.05483423f, 0.11449111f, 0.11289652f,0.10939839f, 0.13396506f,
461                                                    -0.08402166f,-0.01901462f,  -0.044678304f,-0.07720565f,0.014350063f,
462                                                    -0.11757958f, -0.0652038f, -0.08185733f,-0.076754324f,-0.092614375f,
463                                                    0.10405491f, 0.052960336f, 0.035755895f,0.035839386f,-0.012540553f,
464                                                    0.036881298f,   0.02913376f,  0.03420159f,0.05448447f,-0.054523353f,
465                                                    0.02582715f, 0.02327355f, -0.011857179f,-0.0011980024f,-0.034641717f,
466                                                    -0.026125094f,-0.17582615f,-0.15923657f,-0.27486774f,-0.0006143371f,
467                                                    0.0001771948f,  -8.470171e-05f, 0.02651807f,0.045790765f,0.06956496f
468             });
469 
470     auto inputToCellWeights =
471             MakeTensor<float, 2>(tensorInfo20x5, {-0.04580283f,   -0.09549462f,   -0.032418985f,  -0.06454633f,
472                                                   -0.043528453f,  0.043018587f,   -0.049152344f,  -0.12418144f,
473                                                   -0.078985475f,  -0.07596889f,   0.019484362f,   -0.11434962f,
474                                                   -0.0074034138f, -0.06314844f,   -0.092981495f,  0.0062155537f,
475                                                   -0.025034338f,  -0.0028890965f, 0.048929527f,   0.06235075f,
476                                                   0.10665918f,    -0.032036792f,  -0.08505916f,   -0.10843358f,
477                                                   -0.13002433f,   -0.036816437f,  -0.02130134f,   -0.016518239f,
478                                                   0.0047691227f,  -0.0025825808f, 0.066017866f,   0.029991534f,
479                                                   -0.10652836f,   -0.1037554f,    -0.13056071f,   -0.03266643f,
480                                                   -0.033702414f,  -0.006473424f,  -0.04611692f,   0.014419339f,
481                                                   -0.025174323f,  0.0396852f,     0.081777506f,   0.06157468f,
482                                                   0.10210095f,    -0.009658194f,  0.046511717f,   0.03603906f,
483                                                   0.0069369148f,  0.015960095f,   -0.06507666f,   0.09551598f,
484                                                   0.053568836f,   0.06408714f,    0.12835667f,    -0.008714329f,
485                                                   -0.20211966f,   -0.12093674f,   0.029450472f,   0.2849013f,
486                                                   -0.029227901f,  0.1164364f,     -0.08560263f,   0.09941786f,
487                                                   -0.036999565f,  -0.028842626f,  -0.0033637602f, -0.017012902f,
488                                                   -0.09720865f,   -0.11193351f,   -0.029155117f,  -0.017936034f,
489                                                   -0.009768936f,  -0.04223324f,   -0.036159635f,  0.06505112f,
490                                                   -0.021742892f,  -0.023377212f,  -0.07221364f,   -0.06430552f,
491                                                   0.05453865f,    0.091149814f,   0.06387331f,    0.007518393f,
492                                                   0.055960953f,   0.069779344f,   0.046411168f,   0.10509911f,
493                                                   0.07463894f,    0.0075130584f,  0.012850982f,   0.04555431f,
494                                                   0.056955688f,   0.06555285f,    0.050801456f,   -0.009862683f,
495                                                   0.00826772f,    -0.026555609f,  -0.0073611983f, -0.0014897042f
496             });
497 
498     auto inputToOutputWeights =
499             MakeTensor<float, 2>(tensorInfo20x5, {-0.0998932f,   -0.07201956f, -0.052803773f,-0.15629593f,-0.15001918f,
500                                                   -0.07650751f,0.02359855f, -0.075155355f, -0.08037709f,  -0.15093534f,
501                                                   0.029517552f, -0.04751393f, 0.010350531f,-0.02664851f, -0.016839722f,
502                                                   -0.023121163f, 0.0077019283f, 0.012851257f, -0.05040649f,-0.0129761f,
503                                                   -0.021737747f,-0.038305793f,-0.06870586f, -0.01481247f,-0.001285394f,
504                                                   0.10124236f,  0.083122835f, 0.053313006f,-0.062235646f,-0.075637154f,
505                                                   -0.027833903f, 0.029774971f,  0.1130802f, 0.09218906f, 0.09506135f,
506                                                   -0.086665764f,-0.037162706f,-0.038880914f,-0.035832845f,-0.014481564f,
507                                                   -0.09825003f,-0.12048569f,-0.097665586f,-0.05287633f, -0.0964047f,
508                                                   -0.11366429f,  0.035777505f,  0.13568819f, 0.052451383f,0.050649304f,
509                                                   0.05798951f, -0.021852335f,-0.099848844f,0.014740475f,-0.078897946f,
510                                                   0.04974699f, 0.014160473f,  0.06973932f,    0.04964942f, 0.033364646f,
511                                                   0.08190124f,   0.025535367f, 0.050893165f, 0.048514254f,0.06945813f,
512                                                   -0.078907564f,-0.06707616f,  -0.11844508f, -0.09986688f,-0.07509403f,
513                                                   0.06263226f,   0.14925587f,   0.20188436f, 0.12098451f,0.14639415f,
514                                                   0.0015017595f, -0.014267382f, -0.03417257f,0.012711468f,0.0028300495f,
515                                                   -0.024758482f, -0.05098548f,-0.0821182f, 0.014225672f,  0.021544158f,
516                                                   0.08949725f,  0.07505268f, -0.0020780868f, 0.04908258f,0.06476295f,
517                                                   -0.022907063f,0.027562456f,0.040185735f, 0.019567577f,-0.015598739f,
518                                                   -0.049097303f, -0.017121866f, -0.083368234f,-0.02332002f,-0.0840956f
519             });
520 
521     auto inputGateBias =
522             MakeTensor<float, 1>(tensorInfo20, {0.02234832f,  0.14757581f,   0.18176508f,  0.10380666f,  0.053110216f,
523                                                 -0.06928846f, -0.13942584f,  -0.11816189f, 0.19483899f,  0.03652339f,
524                                                 -0.10250295f, 0.036714908f,  -0.18426876f, 0.036065217f, 0.21810818f,
525                                                 0.02383196f,  -0.043370757f, 0.08690144f,  -0.04444982f, 0.00030581196f
526             });
527 
528     auto forgetGateBias =
529             MakeTensor<float, 1>(tensorInfo20, {0.035185695f, -0.042891346f, -0.03032477f, 0.23027696f,
530                                                 0.11098921f,  0.15378423f,   0.09263801f,  0.09790885f,
531                                                 0.09508917f,  0.061199076f,  0.07665568f,  -0.015443159f,
532                                                 -0.03499149f, 0.046190713f,  0.08895977f,  0.10899629f,
533                                                 0.40694186f,  0.06030037f,   0.012413437f, -0.06108739f
534             });
535 
536     auto cellBias =
537             MakeTensor<float, 1>(tensorInfo20, {-0.024379363f, 0.0055531194f, 0.23377132f,   0.033463873f,
538                                                 -0.1483596f,   -0.10639995f,  -0.091433935f, 0.058573797f,
539                                                 -0.06809782f,  -0.07889636f,  -0.043246906f, -0.09829136f,
540                                                 -0.4279842f,   0.034901652f,  0.18797937f,   0.0075234566f,
541                                                 0.016178843f,  0.1749513f,    0.13975595f,   0.92058027f
542             });
543 
544     auto outputGateBias =
545             MakeTensor<float, 1>(tensorInfo20, {0.046159424f,  -0.0012809046f, 0.03563469f, 0.12648113f, 0.027195795f,
546                                                 0.35373217f,   -0.018957434f,  0.008907322f, -0.0762701f, 0.12018895f,
547                                                 0.04216877f,   0.0022856654f,  0.040952638f,  0.3147856f,  0.08225149f,
548                                                 -0.057416286f, -0.14995944f,   -0.008040261f, 0.13208859f, 0.029760877f
549             });
550 
551     auto recurrentToInputWeights =
552             MakeTensor<float, 2>(tensorInfo20x16, {-0.001374326f,   -0.078856036f,   0.10672688f,    0.029162422f,
553                                                    -0.11585556f,    0.02557986f,     -0.13446963f,   -0.035785314f,
554                                                    -0.01244275f,    0.025961924f,    -0.02337298f,   -0.044228926f,
555                                                    -0.055839065f,   -0.046598054f,   -0.010546039f,  -0.06900766f,
556                                                    0.027239809f,    0.022582639f,    -0.013296484f,  -0.05459212f,
557                                                    0.08981f,        -0.045407712f,   0.08682226f,    -0.06867011f,
558                                                    -0.14390695f,    -0.02916037f,    0.000996957f,   0.091420636f,
559                                                    0.14283475f,     -0.07390571f,    -0.06402044f,   0.062524505f,
560                                                    -0.093129106f,   0.04860203f,     -0.08364217f,   -0.08119002f,
561                                                    0.009352075f,    0.22920375f,     0.0016303885f,  0.11583097f,
562                                                    -0.13732095f,    0.012405723f,    -0.07551853f,   0.06343048f,
563                                                    0.12162708f,     -0.031923793f,   -0.014335606f,  0.01790974f,
564                                                    -0.10650317f,    -0.0724401f,     0.08554849f,    -0.05727212f,
565                                                    0.06556731f,     -0.042729504f,   -0.043227166f,  0.011683251f,
566                                                    -0.013082158f,   -0.029302018f,   -0.010899579f,  -0.062036745f,
567                                                    -0.022509435f,   -0.00964907f,    -0.01567329f,   0.04260106f,
568                                                    -0.07787477f,    -0.11576462f,    0.017356863f,   0.048673786f,
569                                                    -0.017577527f,   -0.05527947f,    -0.082487635f,  -0.040137455f,
570                                                    -0.10820036f,    -0.04666372f,    0.022746278f,   -0.07851417f,
571                                                    0.01068115f,     0.032956902f,    0.022433773f,   0.0026891115f,
572                                                    0.08944216f,     -0.0685835f,     0.010513544f,   0.07228705f,
573                                                    0.02032331f,     -0.059686817f,   -0.0005566496f, -0.086984694f,
574                                                    0.040414046f,    -0.1380399f,     0.094208956f,   -0.05722982f,
575                                                    0.012092817f,    -0.04989123f,    -0.086576f,     -0.003399834f,
576                                                    -0.04696032f,    -0.045747425f,   0.10091314f,    0.048676282f,
577                                                    -0.029037097f,   0.031399418f,    -0.0040285117f, 0.047237843f,
578                                                    0.09504992f,     0.041799378f,    -0.049185462f,  -0.031518843f,
579                                                    -0.10516937f,    0.026374253f,    0.10058866f,    -0.0033195973f,
580                                                    -0.041975245f,   0.0073591834f,   0.0033782164f,  -0.004325073f,
581                                                    -0.10167381f,    0.042500053f,    -0.01447153f,   0.06464186f,
582                                                    -0.017142897f,   0.03312627f,     0.009205989f,   0.024138335f,
583                                                    -0.011337001f,   0.035530265f,    -0.010912711f,  0.0706555f,
584                                                    -0.005894094f,   0.051841937f,    -0.1401738f,    -0.02351249f,
585                                                    0.0365468f,      0.07590991f,     0.08838724f,    0.021681072f,
586                                                    -0.10086113f,    0.019608743f,    -0.06195883f,   0.077335775f,
587                                                    0.023646897f,    -0.095322326f,   0.02233014f,    0.09756986f,
588                                                    -0.048691444f,   -0.009579111f,   0.07595467f,    0.11480546f,
589                                                    -0.09801813f,    0.019894179f,    0.08502348f,    0.004032281f,
590                                                    0.037211012f,    0.068537936f,    -0.048005626f,  -0.091520436f,
591                                                    -0.028379958f,   -0.01556313f,    0.06554592f,    -0.045599163f,
592                                                    -0.01672207f,    -0.020169014f,   -0.011877351f,  -0.20212261f,
593                                                    0.010889619f,    0.0047078193f,   0.038385306f,   0.08540671f,
594                                                    -0.017140968f,   -0.0035865551f,  0.016678626f,   0.005633034f,
595                                                    0.015963363f,    0.00871737f,     0.060130805f,   0.028611384f,
596                                                    0.10109069f,     -0.015060172f,   -0.07894427f,   0.06401885f,
597                                                    0.011584063f,    -0.024466386f,   0.0047652307f,  -0.09041358f,
598                                                    0.030737216f,    -0.0046374933f,  0.14215417f,    -0.11823516f,
599                                                    0.019899689f,    0.006106124f,    -0.027092824f,  0.0786356f,
600                                                    0.05052217f,     -0.058925f,      -0.011402121f,  -0.024987547f,
601                                                    -0.0013661642f,  -0.06832946f,    -0.015667673f,  -0.1083353f,
602                                                    -0.00096863037f, -0.06988685f,    -0.053350925f,  -0.027275559f,
603                                                    -0.033664223f,   -0.07978348f,    -0.025200296f,  -0.017207067f,
604                                                    -0.058403496f,   -0.055697463f,   0.005798788f,   0.12965427f,
605                                                    -0.062582195f,   0.0013350133f,   -0.10482091f,   0.0379771f,
606                                                    0.072521195f,    -0.0029455067f,  -0.13797039f,   -0.03628521f,
607                                                    0.013806405f,    -0.017858358f,   -0.01008298f,   -0.07700066f,
608                                                    -0.017081132f,   0.019358726f,    0.0027079724f,  0.004635139f,
609                                                    0.062634714f,    -0.02338735f,    -0.039547626f,  -0.02050681f,
610                                                    0.03385117f,     -0.083611414f,   0.002862572f,   -0.09421313f,
611                                                    0.058618143f,    -0.08598433f,    0.00972939f,    0.023867095f,
612                                                    -0.053934585f,   -0.023203006f,   0.07452513f,    -0.048767887f,
613                                                    -0.07314807f,    -0.056307215f,   -0.10433547f,   -0.06440842f,
614                                                    0.04328182f,     0.04389765f,     -0.020006588f,  -0.09076438f,
615                                                    -0.11652589f,    -0.021705797f,   0.03345259f,    -0.010329105f,
616                                                    -0.025767034f,   0.013057034f,    -0.07316461f,   -0.10145612f,
617                                                    0.06358255f,     0.18531723f,     0.07759293f,    0.12006465f,
618                                                    0.1305557f,      0.058638252f,    -0.03393652f,   0.09622831f,
619                                                    -0.16253184f,    -2.4580743e-06f, 0.079869635f,   -0.070196845f,
620                                                    -0.005644518f,   0.06857898f,     -0.12598175f,   -0.035084512f,
621                                                    0.03156317f,     -0.12794146f,    -0.031963028f,  0.04692781f,
622                                                    0.030070418f,    0.0071660685f,   -0.095516115f,  -0.004643372f,
623                                                    0.040170413f,    -0.062104587f,   -0.0037324072f, 0.0554317f,
624                                                    0.08184801f,     -0.019164372f,   0.06791302f,    0.034257166f,
625                                                    -0.10307039f,    0.021943003f,    0.046745934f,   0.0790918f,
626                                                    -0.0265588f,     -0.007824208f,   0.042546265f,   -0.00977924f,
627                                                    -0.0002440307f,  -0.017384544f,   -0.017990116f,  0.12252321f,
628                                                    -0.014512694f,   -0.08251313f,    0.08861942f,    0.13589665f,
629                                                    0.026351685f,    0.012641483f,    0.07466548f,    0.044301085f,
630                                                    -0.045414884f,   -0.051112458f,   0.03444247f,    -0.08502782f,
631                                                    -0.04106223f,    -0.028126027f,   0.028473156f,   0.10467447f
632             });
633 
634     auto recurrentToForgetWeights =
635             MakeTensor<float, 2>(tensorInfo20x16, {-0.057784554f,  -0.026057621f,  -0.068447545f,   -0.022581743f,
636                                                    0.14811787f,    0.10826372f,    0.09471067f,     0.03987225f,
637                                                    -0.0039523416f, 0.00030638507f, 0.053185795f,    0.10572994f,
638                                                    0.08414449f,    -0.022036452f,  -0.00066928595f, -0.09203576f,
639                                                    0.032950465f,   -0.10985798f,   -0.023809856f,   0.0021431844f,
640                                                    -0.02196096f,   -0.00326074f,   0.00058621005f,  -0.074678116f,
641                                                    -0.06193199f,   0.055729095f,   0.03736828f,     0.020123724f,
642                                                    0.061878487f,   -0.04729229f,   0.034919553f,    -0.07585433f,
643                                                    -0.04421272f,   -0.044019096f,  0.085488975f,    0.04058006f,
644                                                    -0.06890133f,   -0.030951202f,  -0.024628663f,   -0.07672815f,
645                                                    0.034293607f,   0.08556707f,    -0.05293577f,    -0.033561368f,
646                                                    -0.04899627f,   0.0241671f,     0.015736353f,    -0.095442444f,
647                                                    -0.029564252f,  0.016493602f,   -0.035026584f,   0.022337519f,
648                                                    -0.026871363f,  0.004780428f,   0.0077918363f,   -0.03601621f,
649                                                    0.016435321f,   -0.03263031f,   -0.09543275f,    -0.047392778f,
650                                                    0.013454138f,   0.028934088f,   0.01685226f,     -0.086110644f,
651                                                    -0.046250615f,  -0.01847454f,   0.047608484f,    0.07339695f,
652                                                    0.034546845f,   -0.04881143f,   0.009128804f,    -0.08802852f,
653                                                    0.03761666f,    0.008096139f,   -0.014454086f,   0.014361001f,
654                                                    -0.023502491f,  -0.0011840804f, -0.07607001f,    0.001856849f,
655                                                    -0.06509276f,   -0.006021153f,  -0.08570962f,    -0.1451793f,
656                                                    0.060212336f,   0.055259194f,   0.06974018f,     0.049454916f,
657                                                    -0.027794661f,  -0.08077226f,   -0.016179763f,   0.1169753f,
658                                                    0.17213494f,    -0.0056326236f, -0.053934924f,   -0.0124349f,
659                                                    -0.11520337f,   0.05409887f,    0.088759385f,    0.0019655675f,
660                                                    0.0042065294f,  0.03881498f,    0.019844765f,    0.041858196f,
661                                                    -0.05695512f,   0.047233116f,   0.038937137f,    -0.06542224f,
662                                                    0.014429736f,   -0.09719407f,   0.13908425f,     -0.05379757f,
663                                                    0.012321099f,   0.082840554f,   -0.029899208f,   0.044217527f,
664                                                    0.059855383f,   0.07711018f,    -0.045319796f,   0.0948846f,
665                                                    -0.011724666f,  -0.0033288454f, -0.033542685f,   -0.04764985f,
666                                                    -0.13873616f,   0.040668588f,   0.034832682f,    -0.015319203f,
667                                                    -0.018715994f,  0.046002675f,   0.0599172f,      -0.043107376f,
668                                                    0.0294216f,     -0.002314414f,  -0.022424703f,   0.0030315618f,
669                                                    0.0014641669f,  0.0029166266f,  -0.11878115f,    0.013738511f,
670                                                    0.12375372f,    -0.0006038222f, 0.029104086f,    0.087442465f,
671                                                    0.052958444f,   0.07558703f,    0.04817258f,     0.044462286f,
672                                                    -0.015213451f,  -0.08783778f,   -0.0561384f,     -0.003008196f,
673                                                    0.047060397f,   -0.002058388f,  0.03429439f,     -0.018839769f,
674                                                    0.024734668f,   0.024614193f,   -0.042046934f,   0.09597743f,
675                                                    -0.0043254104f, 0.04320769f,    0.0064070094f,   -0.0019131786f,
676                                                    -0.02558259f,   -0.022822596f,  -0.023273505f,   -0.02464396f,
677                                                    -0.10991725f,   -0.006240552f,  0.0074488563f,   0.024044557f,
678                                                    0.04383914f,    -0.046476185f,  0.028658995f,    0.060410924f,
679                                                    0.050786525f,   0.009452605f,   -0.0073054377f,  -0.024810238f,
680                                                    0.0052906186f,  0.0066939713f,  -0.0020913032f,  0.014515517f,
681                                                    0.015898481f,   0.021362653f,   -0.030262267f,   0.016587038f,
682                                                    -0.011442813f,  0.041154444f,   -0.007631438f,   -0.03423484f,
683                                                    -0.010977775f,  0.036152758f,   0.0066366293f,   0.11915515f,
684                                                    0.02318443f,    -0.041350313f,  0.021485701f,    -0.10906167f,
685                                                    -0.028218046f,  -0.00954771f,   0.020531068f,    -0.11995105f,
686                                                    -0.03672871f,   0.024019798f,   0.014255957f,    -0.05221243f,
687                                                    -0.00661567f,   -0.04630967f,   0.033188973f,    0.10107534f,
688                                                    -0.014027541f,  0.030796422f,   -0.10270911f,    -0.035999842f,
689                                                    0.15443139f,    0.07684145f,    0.036571592f,    -0.035900835f,
690                                                    -0.0034699554f, 0.06209149f,    0.015920248f,    -0.031122351f,
691                                                    -0.03858649f,   0.01849943f,    0.13872518f,     0.01503974f,
692                                                    0.069941424f,   -0.06948533f,   -0.0088794185f,  0.061282158f,
693                                                    -0.047401894f,  0.03100163f,    -0.041533746f,   -0.10430945f,
694                                                    0.044574402f,   -0.01425562f,   -0.024290353f,   0.034563623f,
695                                                    0.05866852f,    0.023947537f,   -0.09445152f,    0.035450947f,
696                                                    0.02247216f,    -0.0042998926f, 0.061146557f,    -0.10250651f,
697                                                    0.020881841f,   -0.06747029f,   0.10062043f,     -0.0023941975f,
698                                                    0.03532124f,    -0.016341697f,  0.09685456f,     -0.016764693f,
699                                                    0.051808182f,   0.05875331f,    -0.04536488f,    0.001626336f,
700                                                    -0.028892258f,  -0.01048663f,   -0.009793449f,   -0.017093895f,
701                                                    0.010987891f,   0.02357273f,    -0.00010856845f, 0.0099760275f,
702                                                    -0.001845119f,  -0.03551521f,   0.0018358806f,   0.05763657f,
703                                                    -0.01769146f,   0.040995963f,   0.02235177f,     -0.060430344f,
704                                                    0.11475477f,    -0.023854522f,  0.10071741f,     0.0686208f,
705                                                    -0.014250481f,  0.034261297f,   0.047418304f,    0.08562733f,
706                                                    -0.030519066f,  0.0060542435f,  0.014653856f,    -0.038836084f,
707                                                    0.04096551f,    0.032249358f,   -0.08355519f,    -0.026823482f,
708                                                    0.056386515f,   -0.010401743f,  -0.028396193f,   0.08507674f,
709                                                    0.014410365f,   0.020995233f,   0.17040324f,     0.11511526f,
710                                                    0.02459721f,    0.0066619175f,  0.025853224f,    -0.023133837f,
711                                                    -0.081302024f,  0.017264642f,   -0.009585969f,   0.09491168f,
712                                                    -0.051313367f,  0.054532815f,   -0.014298593f,   0.10657464f,
713                                                    0.007076659f,   0.10964551f,    0.0409152f,      0.008275321f,
714                                                    -0.07283536f,   0.07937492f,    0.04192024f,     -0.1075027f
715             });
716 
717     auto recurrentToCellWeights =
718             MakeTensor<float, 2>(tensorInfo20x16, {-0.037322544f,   0.018592842f,   0.0056175636f,  -0.06253426f,
719                                                    0.055647098f,    -0.05713207f,   -0.05626563f,   0.005559383f,
720                                                    0.03375411f,     -0.025757805f,  -0.088049285f,  0.06017052f,
721                                                    -0.06570978f,    0.007384076f,   0.035123326f,   -0.07920549f,
722                                                    0.053676967f,    0.044480428f,   -0.07663568f,   0.0071805613f,
723                                                    0.08089997f,     0.05143358f,    0.038261272f,   0.03339287f,
724                                                    -0.027673481f,   0.044746667f,   0.028349208f,   0.020090483f,
725                                                    -0.019443132f,   -0.030755889f,  -0.0040000007f, 0.04465846f,
726                                                    -0.021585021f,   0.0031670958f,  0.0053199246f,  -0.056117613f,
727                                                    -0.10893326f,    0.076739706f,   -0.08509834f,   -0.027997585f,
728                                                    0.037871376f,    0.01449768f,    -0.09002357f,   -0.06111149f,
729                                                    -0.046195522f,   0.0422062f,     -0.005683705f,  -0.1253618f,
730                                                    -0.012925729f,   -0.04890792f,   0.06985068f,    0.037654128f,
731                                                    0.03398274f,     -0.004781977f,  0.007032333f,   -0.031787455f,
732                                                    0.010868644f,    -0.031489216f,  0.09525667f,    0.013939797f,
733                                                    0.0058680447f,   0.0167067f,     0.02668468f,    -0.04797466f,
734                                                    -0.048885044f,   -0.12722108f,   0.035304096f,   0.06554885f,
735                                                    0.00972396f,     -0.039238118f,  -0.05159735f,   -0.11329045f,
736                                                    0.1613692f,      -0.03750952f,   0.06529313f,    -0.071974665f,
737                                                    -0.11769596f,    0.015524369f,   -0.0013754242f, -0.12446318f,
738                                                    0.02786344f,     -0.014179351f,  0.005264273f,   0.14376344f,
739                                                    0.015983658f,    0.03406988f,    -0.06939408f,   0.040699873f,
740                                                    0.02111075f,     0.09669095f,    0.041345075f,   -0.08316494f,
741                                                    -0.07684199f,    -0.045768797f,  0.032298047f,   -0.041805092f,
742                                                    0.0119405f,      0.0061010392f,  0.12652606f,    0.0064572375f,
743                                                    -0.024950314f,   0.11574242f,    0.04508852f,    -0.04335324f,
744                                                    0.06760663f,     -0.027437469f,  0.07216407f,    0.06977076f,
745                                                    -0.05438599f,    0.034033038f,   -0.028602652f,  0.05346137f,
746                                                    0.043184172f,    -0.037189785f,  0.10420091f,    0.00882477f,
747                                                    -0.054019816f,   -0.074273005f,  -0.030617684f,  -0.0028467078f,
748                                                    0.024302477f,    -0.0038869337f, 0.005332455f,   0.0013399826f,
749                                                    0.04361412f,     -0.007001822f,  0.09631092f,    -0.06702025f,
750                                                    -0.042049985f,   -0.035070654f,  -0.04103342f,   -0.10273396f,
751                                                    0.0544271f,      0.037184782f,   -0.13150354f,   -0.0058036847f,
752                                                    -0.008264958f,   0.042035464f,   0.05891794f,    0.029673764f,
753                                                    0.0063542654f,   0.044788733f,   0.054816857f,   0.062257513f,
754                                                    -0.00093483756f, 0.048938446f,   -0.004952862f,  -0.007730018f,
755                                                    -0.04043371f,    -0.017094059f,  0.07229206f,    -0.023670016f,
756                                                    -0.052195564f,   -0.025616996f,  -0.01520939f,   0.045104615f,
757                                                    -0.007376126f,   0.003533447f,   0.006570588f,   0.056037236f,
758                                                    0.12436656f,     0.051817212f,   0.028532185f,   -0.08686856f,
759                                                    0.11868599f,     0.07663395f,    -0.07323171f,   0.03463402f,
760                                                    -0.050708205f,   -0.04458982f,   -0.11590894f,   0.021273347f,
761                                                    0.1251325f,      -0.15313013f,   -0.12224372f,   0.17228661f,
762                                                    0.023029093f,    0.086124025f,   0.006445803f,   -0.03496501f,
763                                                    0.028332196f,    0.04449512f,    -0.042436164f,  -0.026587414f,
764                                                    -0.006041347f,   -0.09292539f,   -0.05678812f,   0.03897832f,
765                                                    0.09465633f,     0.008115513f,   -0.02171956f,   0.08304309f,
766                                                    0.071401566f,    0.019622514f,   0.032163795f,   -0.004167056f,
767                                                    0.02295182f,     0.030739572f,   0.056506045f,   0.004612461f,
768                                                    0.06524936f,     0.059999723f,   0.046395954f,   -0.0045512207f,
769                                                    -0.1335546f,     -0.030136576f,  0.11584653f,    -0.014678886f,
770                                                    0.0020118146f,   -0.09688814f,   -0.0790206f,    0.039770417f,
771                                                    -0.0329582f,     0.07922767f,    0.029322514f,   0.026405897f,
772                                                    0.04207835f,     -0.07073373f,   0.063781224f,   0.0859677f,
773                                                    -0.10925287f,    -0.07011058f,   0.048005477f,   0.03438226f,
774                                                    -0.09606514f,    -0.006669445f,  -0.043381985f,  0.04240257f,
775                                                    -0.06955775f,    -0.06769346f,   0.043903265f,   -0.026784198f,
776                                                    -0.017840602f,   0.024307009f,   -0.040079936f,  -0.019946516f,
777                                                    0.045318738f,    -0.12233574f,   0.026170589f,   0.0074471775f,
778                                                    0.15978073f,     0.10185836f,    0.10298046f,    -0.015476589f,
779                                                    -0.039390966f,   -0.072174534f,  0.0739445f,     -0.1211869f,
780                                                    -0.0347889f,     -0.07943156f,   0.014809798f,   -0.12412325f,
781                                                    -0.0030663363f,  0.039695457f,   0.0647603f,     -0.08291318f,
782                                                    -0.018529687f,   -0.004423833f,  0.0037507233f,  0.084633216f,
783                                                    -0.01514876f,    -0.056505352f,  -0.012800942f,  -0.06994386f,
784                                                    0.012962922f,    -0.031234352f,  0.07029052f,    0.016418684f,
785                                                    0.03618972f,     0.055686004f,   -0.08663945f,   -0.017404709f,
786                                                    -0.054761406f,   0.029065743f,   0.052404847f,   0.020238016f,
787                                                    0.0048197987f,   -0.0214882f,    0.07078733f,    0.013016777f,
788                                                    0.06262858f,     0.009184685f,   0.020785125f,   -0.043904778f,
789                                                    -0.0270329f,     -0.03299152f,   -0.060088247f,  -0.015162964f,
790                                                    -0.001828936f,   0.12642565f,    -0.056757294f,  0.013586685f,
791                                                    0.09232601f,     -0.035886683f,  0.06000002f,    0.05229691f,
792                                                    -0.052580316f,   -0.082029596f,  -0.010794592f,  0.012947712f,
793                                                    -0.036429964f,   -0.085508935f,  -0.13127148f,   -0.017744139f,
794                                                    0.031502828f,    0.036232427f,   -0.031581745f,  0.023051167f,
795                                                    -0.05325106f,    -0.03421577f,   0.028793324f,   -0.034633752f,
796                                                    -0.009881397f,   -0.043551125f,  -0.018609839f,  0.0019097115f,
797                                                    -0.008799762f,   0.056595087f,   0.0022273948f,  0.055752404f
798             });
799 
800     auto recurrentToOutputWeights =
801             MakeTensor<float, 2>(tensorInfo20x16, {0.025825322f, -0.05813119f, 0.09495884f,-0.045984812f, -0.01255415f,
802                                                     -0.0026479573f,-0.08196161f,-0.054914974f,-0.0046604523f,
803                                                    -0.029587349f, -0.044576716f,  -0.07480124f,  -0.082868785f,
804                                                    0.023254942f,    0.027502948f, -0.0039728214f, -0.08683098f,
805                                                    -0.08116779f,  -0.014675607f,   -0.037924774f, -0.023314456f,
806                                                    -0.007401714f, -0.09255757f,  0.029460307f,    -0.08829125f,
807                                                     -0.005139627f,  -0.08989442f,  -0.0555066f,   0.13596267f,
808                                                    -0.025062224f, -0.048351806f,  -0.03850004f,  0.07266485f,
809                                                    -0.022414139f,   0.05940088f, 0.075114764f,   0.09597592f,
810                                                    -0.010211725f, -0.0049794707f,  -0.011523867f, -0.025980417f,
811                                                    0.072999895f,  0.11091378f,   -0.081685916f,   0.014416728f,
812                                                     0.043229222f,   0.034178585f,  -0.07530371f,  0.035837382f,
813                                                    -0.085607f, -0.007721233f,  -0.03287832f,  -0.043848954f,
814                                                    -0.06404588f,    -0.06632928f, -0.073643476f,  0.008214239f,
815                                                    -0.045984086f, 0.039764922f,    0.03474462f, 0.060612556f,
816                                                    -0.080590084f, 0.049127717f,  0.04151091f,     -0.030063879f,
817                                                     0.008801774f,   -0.023021035f, -0.019558564f, 0.05158114f,
818                                                    -0.010947698f, -0.011825728f,  0.0075720972f, 0.0699727f,
819                                                    -0.0039981045f,  0.069350146f, 0.08799282f,    0.016156472f,
820                                                    0.035502106f,  0.11695009f,     0.006217345f, 0.13392477f,
821                                                    -0.037875112f, 0.025745004f,  0.08940699f,     -0.00924166f,
822                                                     0.0046702605f,  -0.036598757f, -0.08811812f,  0.10522024f,
823                                                    -0.032441203f, 0.008176899f,   -0.04454919f,  0.07058152f,
824                                                    0.0067963637f,   0.039206743f, 0.03259838f,    0.03725492f,
825                                                    -0.09515802f,  0.013326398f,    -0.052055415f, -0.025676316f,
826                                                    0.03198509f,   -0.015951829f, -0.058556724f,   0.036879618f,
827                                                     0.043357447f,   0.028362012f,  -0.05908629f,  0.0059240665f,
828                                                    -0.04995891f, -0.019187413f,0.0276265f, -0.01628143f, 0.0025863599f,
829                                                    0.08800015f, 0.035250366f,   -0.022165963f, -0.07328642f,
830                                                    -0.009415526f,   -0.07455109f, 0.11690406f,    0.0363299f,
831                                                    0.07411125f,   0.042103454f,    -0.009660886f, 0.019076364f,
832                                                    0.018299393f, -0.046004917f, 0.08891175f,0.0431396f, -0.026327137f,
833                                                    -0.051502608f, 0.08979574f,   -0.051670972f,   0.04940282f,
834                                                     -0.07491107f,   -0.021240504f, 0.022596184f,  -0.034280192f,
835                                                    0.060163025f, -0.058211457f,  -0.051837247f, -0.01349775f,
836                                                    -0.04639988f,    -0.035936575f, -0.011681591f,  0.064818054f,
837                                                    0.0073146066f, -0.021745546f,   -0.043124277f, -0.06471268f,
838                                                    -0.07053354f,  -0.029321948f, -0.05330136f,    0.016933719f,
839                                                     -0.053782392f,  0.13747959f,   -0.1361751f,   -0.11569455f,
840                                                    0.0033329215f, 0.05693899f,    -0.053219706f, 0.063698f,
841                                                    0.07977434f,     -0.07924483f, 0.06936997f,    0.0034815092f,
842                                                    -0.007305279f, -0.037325785f,   -0.07251102f, -0.033633437f,
843                                                    -0.08677009f,  0.091591336f,  -0.14165086f,    0.021752775f,
844                                                     0.019683983f,   0.0011612234f, -0.058154266f, 0.049996935f,
845                                                    0.0288841f, -0.0024567875f, -0.14345716f, 0.010955264f,-0.10234828f,
846                                                    0.1183656f, -0.0010731248f, -0.023590032f,-0.072285876f,-0.0724771f,
847                                                    -0.026382286f, -0.0014920527f, 0.042667855f,  0.0018776858f,
848                                                    0.02986552f,     0.009814309f, 0.0733756f,     0.12289186f,
849                                                    0.018043943f,  -0.0458958f,     0.049412545f, 0.033632483f,
850                                                    0.05495232f,   0.036686596f,  -0.013781798f,   -0.010036754f,
851                                                     0.02576849f,    -0.08307328f,  0.010112348f,  0.042521734f,
852                                                    -0.05869831f, -0.071689695f, 0.03876447f, -0.13275425f, -0.0352966f,
853                                                    -0.023077697f, 0.10285965f,    0.084736146f,  0.15568255f,
854                                                    -0.00040734606f, 0.027835453f, -0.10292561f,   -0.032401145f,
855                                                    0.10053256f,   -0.026142767f,   -0.08271222f, -0.0030240538f,
856                                                    -0.016368777f, 0.1070414f,    0.042672627f,    0.013456989f,
857                                                     -0.0437609f,    -0.022309763f, 0.11576483f,   0.04108048f,
858                                                    0.061026827f, -0.0190714f,  -0.0869359f, 0.037901703f,  0.0610107f,
859                                                    0.07202949f, 0.01675338f,    0.086139716f,  -0.08795751f,
860                                                    -0.014898893f,   -0.023771819f, -0.01965048f,   0.007955471f,
861                                                    -0.043740474f, 0.03346837f,     -0.10549954f, 0.090567775f,
862                                                    0.042013682f,  -0.03176985f,  0.12569028f,     -0.02421228f,
863                                                     -0.029526481f,  0.023851605f,  0.031539805f,  0.05292009f,
864                                                    -0.02344001f, -0.07811758f,   -0.08834428f,  0.10094801f,
865                                                    0.16594367f,     -0.06861939f, -0.021256343f,  -0.041093912f,
866                                                    -0.06669611f,  0.035498552f,    0.021757556f, -0.09302526f,
867                                                    -0.015403468f, -0.06614931f,  -0.051798206f,   -0.013874718f,
868                                                     0.03630673f,    0.010412845f,  -0.08077351f,  0.046185967f,
869                                                    0.0035662893f, 0.03541868f,    -0.094149634f, -0.034814864f,
870                                                    0.003128424f,    -0.020674974f, -0.03944324f,   -0.008110165f,
871                                                    -0.11113267f,  0.08484226f,     0.043586485f, 0.040582247f,
872                                                    0.0968012f,    -0.065249965f, -0.028036479f,   0.0050708856f,
873                                                     0.0017462453f,  0.0326779f,    0.041296225f,  0.09164146f,
874                                                    -0.047743853f, -0.015952192f,  -0.034451712f, 0.084197424f,
875                                                    -0.05347844f,    -0.11768019f, 0.085926116f,   -0.08251791f,
876                                                    -0.045081906f, 0.0948852f,      0.068401024f, 0.024856757f,
877                                                    0.06978981f,   -0.057309967f, -0.012775832f,   -0.0032452994f,
878                                                     0.01977615f, -0.041040014f, -0.024264973f,0.063464895f, 0.05431621f
879             });
880 
881     auto cellToInputWeights =
882             MakeTensor<float, 1>(tensorInfo20, {0.040369894f, 0.030746894f,  0.24704495f,  0.018586371f, -0.037586458f,
883                                                 -0.15312155f, -0.11812848f,  -0.11465643f, 0.20259799f,   0.11418174f,
884                                                 -0.10116027f, -0.011334949f, 0.12411352f, -0.076769054f,-0.052169047f,
885                                                 0.21198851f,  -0.38871562f,  -0.09061183f, -0.09683246f,  -0.21929175f
886             });
887 
888 
889     auto cellToForgetWeights =
890             MakeTensor<float, 1>(tensorInfo20, {-0.01998659f,-0.15568835f,-0.24248174f,   -0.012770197f, 0.041331276f,
891                                                 -0.072311886f, -0.052123554f,-0.0066330447f,-0.043891653f,0.036225766f,
892                                                 -0.047248036f, 0.021479502f,0.033189066f, 0.11952997f,   -0.020432774f,
893                                                 0.64658105f,   -0.06650122f,  -0.03467612f,  0.095340036f, 0.23647355f
894             });
895 
896     auto cellToOutputWeights =
897             MakeTensor<float, 1>(tensorInfo20, {0.08286371f,  -0.08261836f, -0.51210177f, 0.002913762f, 0.17764764f,
898                                                 -0.5495371f,  -0.08460716f, -0.24552552f, 0.030037103f, 0.04123544f,
899                                                 -0.11940523f, 0.007358328f, 0.1890978f,   0.4833202f,   -0.34441817f,
900                                                 0.36312827f,  -0.26375428f, 0.1457655f,   -0.19724406f, 0.15548733f
901             });
902 
903     auto projectionWeights =
904             MakeTensor<float, 2>(tensorInfo16x20,
905                                  {-0.009802181f,  0.09401916f,    0.0717386f,     -0.13895074f,  0.09641832f,
906                                   0.060420845f,   0.08539281f,    0.054285463f,   0.061395317f,  0.034448683f,
907                                   -0.042991187f,  0.019801661f,   -0.16840284f,   -0.015726732f, -0.23041931f,
908                                   -0.024478018f,  -0.10959692f,   -0.013875541f,  0.18600968f,   -0.061274476f,
909                                   0.0138165f,     -0.08160894f,   -0.07661644f,   0.032372914f,  0.16169067f,
910                                   0.22465782f,    -0.03993472f,   -0.004017731f,  0.08633481f,   -0.28869787f,
911                                   0.08682067f,    0.17240396f,    0.014975425f,   0.056431185f,  0.031037588f,
912                                   0.16702051f,    0.0077946745f,  0.15140012f,    0.29405436f,   0.120285f,
913                                   -0.188994f,     -0.027265169f,  0.043389652f,   -0.022061434f, 0.014777949f,
914                                   -0.20203483f,   0.094781205f,   0.19100232f,    0.13987629f,   -0.036132768f,
915                                   -0.06426278f,   -0.05108664f,   0.13221376f,    0.009441198f,  -0.16715929f,
916                                   0.15859416f,    -0.040437475f,  0.050779544f,   -0.022187516f, 0.012166504f,
917                                   0.027685808f,   -0.07675938f,   -0.0055694645f, -0.09444123f,  0.0046453946f,
918                                   0.050794356f,   0.10770313f,    -0.20790008f,   -0.07149004f,  -0.11425117f,
919                                   0.008225835f,   -0.035802525f,  0.14374903f,    0.15262283f,   0.048710253f,
920                                   0.1847461f,     -0.007487823f,  0.11000021f,    -0.09542012f,  0.22619456f,
921                                   -0.029149994f,  0.08527916f,    0.009043713f,   0.0042746216f, 0.016261552f,
922                                   0.022461696f,   0.12689082f,    -0.043589946f,  -0.12035478f,  -0.08361797f,
923                                   -0.050666027f,  -0.1248618f,    -0.1275799f,    -0.071875185f, 0.07377272f,
924                                   0.09944291f,    -0.18897448f,   -0.1593054f,    -0.06526116f,  -0.040107165f,
925                                   -0.004618631f,  -0.067624845f,  -0.007576253f,  0.10727444f,   0.041546922f,
926                                   -0.20424393f,   0.06907816f,    0.050412357f,   0.00724631f,   0.039827548f,
927                                   0.12449835f,    0.10747581f,    0.13708383f,    0.09134148f,   -0.12617786f,
928                                   -0.06428341f,   0.09956831f,    0.1208086f,     -0.14676677f,  -0.0727722f,
929                                   0.1126304f,     0.010139365f,   0.015571211f,   -0.038128063f, 0.022913318f,
930                                   -0.042050496f,  0.16842307f,    -0.060597885f,  0.10531834f,   -0.06411776f,
931                                   -0.07451711f,   -0.03410368f,   -0.13393489f,   0.06534304f,   0.003620307f,
932                                   0.04490757f,    0.05970546f,    0.05197996f,    0.02839995f,   0.10434969f,
933                                   -0.013699693f,  -0.028353551f,  -0.07260381f,   0.047201227f,  -0.024575593f,
934                                   -0.036445823f,  0.07155557f,    0.009672501f,   -0.02328883f,  0.009533515f,
935                                   -0.03606021f,   -0.07421458f,   -0.028082801f,  -0.2678904f,   -0.13221288f,
936                                   0.18419984f,    -0.13012612f,   -0.014588381f,  -0.035059117f, -0.04824723f,
937                                   0.07830115f,    -0.056184657f,  0.03277091f,    0.025466874f,  0.14494097f,
938                                   -0.12522776f,   -0.098633975f,  -0.10766018f,   -0.08317623f,  0.08594209f,
939                                   0.07749552f,    0.039474737f,   0.1776665f,     -0.07409566f,  -0.0477268f,
940                                   0.29323658f,    0.10801441f,    0.1154011f,     0.013952499f,  0.10739139f,
941                                   0.10708251f,    -0.051456142f,  0.0074137426f,  -0.10430189f,  0.10034707f,
942                                   0.045594677f,   0.0635285f,     -0.0715442f,    -0.089667566f, -0.10811871f,
943                                   0.00026344223f, 0.08298446f,    -0.009525053f,  0.006585689f,  -0.24567553f,
944                                   -0.09450807f,   0.09648481f,    0.026996298f,   -0.06419476f,  -0.04752702f,
945                                   -0.11063944f,   -0.23441927f,   -0.17608605f,   -0.052156363f, 0.067035615f,
946                                   0.19271925f,    -0.0032889997f, -0.043264326f,  0.09663576f,   -0.057112187f,
947                                   -0.10100678f,   0.0628376f,     0.04447668f,    0.017961001f,  -0.10094388f,
948                                   -0.10190601f,   0.18335468f,    0.10494553f,    -0.052095775f, -0.0026118709f,
949                                   0.10539724f,    -0.04383912f,   -0.042349473f,  0.08438151f,   -0.1947263f,
950                                   0.02251204f,    0.11216432f,    -0.10307853f,   0.17351969f,   -0.039091777f,
951                                   0.08066188f,    -0.00561982f,   0.12633002f,    0.11335965f,   -0.0088127935f,
952                                   -0.019777594f,  0.06864014f,    -0.059751723f,  0.016233567f,  -0.06894641f,
953                                   -0.28651384f,   -0.004228674f,  0.019708522f,   -0.16305895f,  -0.07468996f,
954                                   -0.0855457f,    0.099339016f,   -0.07580735f,   -0.13775392f,  0.08434318f,
955                                   0.08330512f,    -0.12131499f,   0.031935584f,   0.09180414f,   -0.08876437f,
956                                   -0.08049874f,   0.008753825f,   0.03498998f,    0.030215185f,  0.03907079f,
957                                   0.089751154f,   0.029194152f,   -0.03337423f,   -0.019092513f, 0.04331237f,
958                                   0.04299654f,    -0.036394123f,  -0.12915532f,   0.09793732f,   0.07512415f,
959                                   -0.11319543f,   -0.032502122f,  0.15661901f,    0.07671967f,   -0.005491124f,
960                                   -0.19379048f,   -0.218606f,     0.21448623f,    0.017840758f,  0.1416943f,
961                                   -0.07051762f,   0.19488361f,    0.02664691f,    -0.18104725f,  -0.09334311f,
962                                   0.15026465f,    -0.15493552f,   -0.057762887f,  -0.11604192f,  -0.262013f,
963                                   -0.01391798f,   0.012185008f,   0.11156489f,    -0.07483202f,  0.06693364f,
964                                   -0.26151478f,   0.046425626f,   0.036540434f,   -0.16435726f,  0.17338543f,
965                                   -0.21401681f,   -0.11385144f,   -0.08283257f,   -0.069031075f, 0.030635102f,
966                                   0.010969227f,   0.11109743f,    0.010919218f,   0.027526086f,  0.13519906f,
967                                   0.01891392f,    -0.046839405f,  -0.040167913f,  0.017953383f,  -0.09700955f,
968                                   0.0061885654f,  -0.07000971f,   0.026893595f,   -0.038844477f, 0.14543656f
969                                  });
970 
971     std::vector<float> projectionBiasVector(outputSize, 0.f);
972     auto projectionBias = MakeTensor<float,1>(tensorInfo16, projectionBiasVector);
973 
974     armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo20x5);
975     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo20x5);
976     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo20x5);
977     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo20x5);
978     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo20x16);
979     armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo20x16);
980     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo20x16);
981     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo20x16);
982     armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo20);
983     armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo20);
984     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo20);
985     armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo20);
986     armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo20);
987     armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo20);
988     armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo20);
989     armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo16x20);
990     armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo16);
991 
992     AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
993     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
994     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
995     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
996     AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
997     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
998     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
999     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
1000     AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]);
1001     AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
1002     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
1003     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
1004     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
1005     AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]);
1006     AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]);
1007     AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
1008     AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]);
1009 
1010     data.m_InputToInputWeights = &inputToInputWeightsTensor;
1011     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
1012     data.m_InputToCellWeights = &inputToCellWeightsTensor;
1013     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
1014     data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
1015     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
1016     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
1017     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
1018     data.m_CellToInputWeights = &cellToInputWeightsTensor;
1019     data.m_InputGateBias = &inputGateBiasTensor;
1020     data.m_ForgetGateBias = &forgetGateBiasTensor;
1021     data.m_CellBias = &cellBiasTensor;
1022     data.m_OutputGateBias = &outputGateBiasTensor;
1023     data.m_CellToForgetWeights = &cellToForgetWeightsTensor;
1024     data.m_CellToOutputWeights = &cellToOutputWeightsTensor;
1025     data.m_ProjectionWeights = &projectionWeightsTensor;
1026     data.m_ProjectionBias = &projectionBiasTensor;
1027 
1028     // Flags to set test configuration
1029     data.m_Parameters.m_ActivationFunc = 4;
1030     data.m_Parameters.m_CifgEnabled = false;
1031     data.m_Parameters.m_PeepholeEnabled = true;
1032     data.m_Parameters.m_ProjectionEnabled = true;
1033 
1034 
1035     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info);
1036     inputHandle->Allocate();
1037     outputStateInHandle->Allocate();
1038     cellStateInHandle->Allocate();
1039 
1040     scratchHandle->Allocate();
1041     outputStateOutHandle->Allocate();
1042     cellStateOutHandle->Allocate();
1043     outputHandle->Allocate();
1044 
1045     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
1046     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
1047     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
1048 
1049     workload->Execute();
1050 
1051     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
1052 
1053     return ret;
1054 
1055 }
1056 
1057 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<T,2> & input,const boost::multi_array<T,2> & outputExpected,float qScale=0.0f,int32_t qOffset=0,armnn::DataType constantDataType=armnn::DataType::Float32)1058 LayerTestResult<T, 2> LstmLayerWithCifgWithPeepholeNoProjectionTestImpl(
1059         armnn::IWorkloadFactory& workloadFactory,
1060         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1061         const armnn::ITensorHandleFactory& tensorHandleFactory,
1062         const boost::multi_array<T, 2>& input,
1063         const boost::multi_array<T, 2>& outputExpected,
1064         float qScale = 0.0f,
1065         int32_t qOffset = 0,
1066         armnn::DataType constantDataType = armnn::DataType::Float32)
1067 {
1068     IgnoreUnused(memoryManager);
1069     bool cifgEnabled = true;
1070     bool peepholeEnabled = true;
1071     bool projectionEnabled = false;
1072     // These are not the input and the output of Lstm yet
1073     unsigned int batchSize = armnn::numeric_cast<unsigned int>(input.shape()[0]);
1074     unsigned int inputSize = armnn::numeric_cast<unsigned int>(input.shape()[1]);
1075 
1076     unsigned int outputSize = armnn::numeric_cast<unsigned int>(outputExpected.shape()[1]);
1077 
1078     const unsigned int cellSize = outputSize;
1079 
1080     // Decide the shape of all input tensors
1081     armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType, qScale, qOffset); // change to ArmnnType
1082     armnn::TensorInfo outputStateInTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
1083     armnn::TensorInfo cellStateInTensorInfo({batchSize, cellSize}, ArmnnType, qScale, qOffset);
1084 
1085     unsigned int scratchBufferSize = cifgEnabled ? cellSize * 3 : cellSize * 4;
1086     armnn::TensorInfo scratchBufferTensorInfo({batchSize, scratchBufferSize}, ArmnnType, qScale, qOffset);
1087     armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
1088     armnn::TensorInfo cellStateOutTensorInfo({batchSize, cellSize}, ArmnnType, qScale, qOffset);
1089     armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
1090 
1091     // List of inputs
1092     std::vector<float> inputData;
1093     inputData.assign(input.data(), input.data() + batchSize*inputSize);
1094     auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputData);
1095 
1096     std::vector<float> outputStateInVector(batchSize * outputSize, 0.f);
1097     auto outputStateInTensor = MakeTensor<float, 2>(outputStateInTensorInfo, outputStateInVector);
1098 
1099     std::vector<float> cellStateInVector(batchSize * cellSize, 0.f);
1100     auto cellStateInTensor = MakeTensor<float, 2>(cellStateInTensorInfo, cellStateInVector);
1101 
1102 
1103     // Prepare all the weights in the descriptor for LSTM
1104     armnn::LstmQueueDescriptor data;
1105     armnn::TensorInfo tensorInfoInput({cellSize, inputSize}, constantDataType, qScale, qOffset);
1106     armnn::TensorInfo tensorInfoOutput({cellSize, outputSize}, constantDataType, qScale, qOffset);
1107     armnn::TensorInfo tensorInfoNumUnits({cellSize}, constantDataType, qScale, qOffset);
1108 
1109     auto inputToCellWeights = MakeTensor<float, 2>(tensorInfoInput,
1110                                                      {-0.49770179f, -0.27711356f, -0.09624726f, 0.05100781f,
1111                                                      0.04717243f, 0.48944736f, -0.38535351f,
1112                                                      -0.17212132f});
1113     auto inputToForgetWeights = MakeTensor<float, 2>(tensorInfoInput,
1114                                                      {-0.55291498f, -0.42866567f, 0.13056988f,
1115                                                        -0.3633365f, -0.22755712f, 0.28253698f, 0.24407166f,
1116                                                        0.33826375f});
1117     auto inputToOutputWeights = MakeTensor<float, 2>(tensorInfoInput,
1118                                                      {0.10725588f, -0.02335852f, -0.55932593f,
1119                                                        -0.09426838f, -0.44257352f, 0.54939759f,
1120                                                        0.01533556f, 0.42751634f});
1121     auto cellBias = MakeTensor<float, 1>(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f});
1122     auto forgetGateBias = MakeTensor<float, 1>(tensorInfoNumUnits, {1.f, 1.f, 1.f, 1.f});
1123     auto outputGateBias = MakeTensor<float, 1>(tensorInfoNumUnits, {0.f, 0.f, 0.f, 0.f});
1124 
1125     auto recurrentToCellWeights = MakeTensor<float, 2>(tensorInfoOutput,
1126                 {0.54066205f, -0.32668582f, -0.43562764f, -0.56094903f, 0.42957711f,
1127                  0.01841056f, -0.32764608f, -0.33027974f, -0.10826075f, 0.20675004f,
1128                  0.19069612f, -0.03026325f, -0.54532051f, 0.33003211f, 0.44901288f,
1129                  0.21193194f});
1130     auto recurrentToForgetWeights = MakeTensor<float, 2>(tensorInfoOutput,
1131                  {-0.13832897f, -0.0515101f, -0.2359007f, -0.16661474f, -0.14340827f,
1132                   0.36986142f, 0.23414481f, 0.55899f, 0.10798943f, -0.41174671f, 0.17751795f,
1133                   -0.34484994f, -0.35874045f, -0.11352962f, 0.27268326f, 0.54058349f});
1134 
1135     auto recurrentToOutputWeights = MakeTensor<float, 2>(tensorInfoOutput,
1136                 {0.41613156f, 0.42610586f, -0.16495961f, -0.5663873f, 0.30579174f, -0.05115908f,
1137                  -0.33941799f, 0.23364776f, 0.11178309f, 0.09481031f, -0.26424935f, 0.46261835f,
1138                  0.50248802f, 0.26114327f, -0.43736315f, 0.33149987f});
1139 
1140     auto cellToForgetWeights = MakeTensor<float, 1>(tensorInfoNumUnits,
1141                 {0.47485286f, -0.51955009f, -0.24458408f, 0.31544167f});
1142     auto cellToOutputWeights = MakeTensor<float, 1>(tensorInfoNumUnits,
1143                 {-0.17135078f, 0.82760304f, 0.85573703f, -0.77109635f});
1144 
1145     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfoInput);
1146     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfoInput);
1147     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfoInput);
1148 
1149     armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfoNumUnits);
1150     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfoNumUnits);
1151     armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfoNumUnits);
1152 
1153     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfoOutput);
1154     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfoOutput);
1155     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfoOutput);
1156 
1157 
1158     armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfoNumUnits);
1159     armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfoNumUnits);
1160 
1161     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
1162     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
1163     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
1164 
1165     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
1166     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
1167     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
1168 
1169     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
1170     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
1171     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
1172 
1173     AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]);
1174     AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]);
1175 
1176 
1177     data.m_InputToCellWeights = &inputToCellWeightsTensor;
1178     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
1179     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
1180 
1181     data.m_CellBias = &cellBiasTensor;
1182     data.m_ForgetGateBias = &forgetGateBiasTensor;
1183     data.m_OutputGateBias = &outputGateBiasTensor;
1184 
1185     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
1186     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
1187     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
1188 
1189     data.m_CellToForgetWeights = &cellToForgetWeightsTensor;
1190     data.m_CellToOutputWeights = &cellToOutputWeightsTensor;
1191 
1192     // other parameters for the descriptor
1193     data.m_Parameters.m_CifgEnabled = cifgEnabled;
1194     data.m_Parameters.m_ProjectionEnabled = projectionEnabled;
1195     data.m_Parameters.m_PeepholeEnabled = peepholeEnabled;
1196 
1197     data.m_Parameters.m_ActivationFunc = 4;
1198     data.m_Parameters.m_ClippingThresProj = 0.0;
1199     data.m_Parameters.m_ClippingThresCell = 0.0;
1200 
1201 
1202     // List of outputs
1203     std::vector<T> scratchBufferVector(batchSize * scratchBufferSize, T());
1204     auto scratchBufferTensor = MakeTensor<T,2>(scratchBufferTensorInfo, scratchBufferVector);
1205     LayerTestResult<T, 2> ret0(scratchBufferTensorInfo);
1206 
1207     // Output state for a certain time step
1208     std::vector<T> outputStateOutVector(batchSize * outputSize, T());
1209     auto outputStateOutTensor = MakeTensor<T,2>(outputStateOutTensorInfo, outputStateOutVector);
1210     LayerTestResult<T, 2> ret1(outputStateOutTensorInfo);
1211 
1212     // Cell state for a certain time step
1213     std::vector<T> cellStateOutVector(batchSize * cellSize, T());
1214     auto cellStateOutTensor = MakeTensor<T,2>(cellStateOutTensorInfo, cellStateOutVector);
1215     LayerTestResult<T, 2> ret2(cellStateOutTensorInfo);
1216 
1217     // Output for a certain time step
1218     std::vector<T> outputVector(batchSize * outputSize, T());
1219     auto outputTensor = MakeTensor<T, 2>(outputTensorInfo, outputVector);
1220     std::vector<T> outputData;
1221     outputData.assign(outputExpected.data(), outputExpected.data() + batchSize*outputSize);
1222     LayerTestResult<T, 2> ret3(outputTensorInfo);
1223     ret3.outputExpected = MakeTensor<T, 2>(outputTensorInfo, outputData);
1224 
1225     // Prepare the inputs and outputs for the workload
1226     std::unique_ptr<armnn::ITensorHandle> inputHandle =
1227             tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1228     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
1229             tensorHandleFactory.CreateTensorHandle(outputStateInTensorInfo);
1230     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
1231             tensorHandleFactory.CreateTensorHandle(cellStateInTensorInfo);
1232 
1233     std::unique_ptr<armnn::ITensorHandle> scratchBufferHandle =
1234             tensorHandleFactory.CreateTensorHandle(scratchBufferTensorInfo);
1235     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
1236             tensorHandleFactory.CreateTensorHandle(outputStateOutTensorInfo);
1237     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
1238             tensorHandleFactory.CreateTensorHandle(cellStateOutTensorInfo);
1239     std::unique_ptr<armnn::ITensorHandle> outputHandle =
1240             tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1241 
1242     armnn::WorkloadInfo info;
1243     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1244     AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get());
1245     AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get());
1246 
1247     AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchBufferHandle.get());
1248     AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get());
1249     AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get());
1250     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1251 
1252     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info);
1253 
1254 
1255     inputHandle->Allocate();
1256     outputStateInHandle->Allocate();
1257     cellStateInHandle->Allocate();
1258 
1259     scratchBufferHandle->Allocate();
1260     outputStateOutHandle->Allocate();
1261     cellStateOutHandle->Allocate();
1262     outputHandle->Allocate();
1263 
1264 
1265     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
1266     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
1267     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
1268 
1269     CopyDataToITensorHandle(scratchBufferHandle.get(), &scratchBufferTensor[0][0]);
1270     CopyDataToITensorHandle(outputStateOutHandle.get(), &outputStateOutTensor[0][0]);
1271     CopyDataToITensorHandle(cellStateOutHandle.get(), &cellStateOutTensor[0][0]);
1272 
1273     workload->Execute();
1274 
1275     CopyDataFromITensorHandle(&ret0.output[0][0], scratchBufferHandle.get());
1276     CopyDataFromITensorHandle(&ret1.output[0][0], outputStateOutHandle.get());
1277     CopyDataFromITensorHandle(&ret2.output[0][0], cellStateOutHandle.get());
1278     CopyDataFromITensorHandle(&ret3.output[0][0], outputHandle.get());
1279 
1280     return ret3;
1281 }
1282 
1283 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
1284 LayerTestResult<T, 2>
LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<T,2> & input,const boost::multi_array<T,2> & outputExpected,float qScale=0.0f,int32_t qOffset=0,armnn::DataType constantDataType=armnn::DataType::Float32)1285 LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl(armnn::IWorkloadFactory& workloadFactory,
1286                                                   const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1287                                                   const armnn::ITensorHandleFactory& tensorHandleFactory,
1288                                                   const boost::multi_array<T, 2>& input,
1289                                                   const boost::multi_array<T, 2>& outputExpected,
1290                                                   float qScale = 0.0f,
1291                                                   int32_t qOffset = 0,
1292                                                   armnn::DataType constantDataType = armnn::DataType::Float32)
1293 {
1294     IgnoreUnused(memoryManager);
1295     unsigned int batchSize = 2;
1296     unsigned int outputSize = 3;
1297     unsigned int inputSize = 5;
1298     unsigned numUnits = 4;
1299 
1300     armnn::TensorInfo inputTensorInfo({batchSize , inputSize}, ArmnnType, qScale, qOffset);
1301     armnn::TensorInfo cellStateInTensorInfo({batchSize , numUnits}, ArmnnType, qScale, qOffset);
1302     armnn::TensorInfo outputStateInTensorInfo({batchSize , outputSize}, ArmnnType, qScale, qOffset);
1303 
1304     // Scratch buffer size without CIFG [batchSize, numUnits * 4]
1305     armnn::TensorInfo scratchBufferTensorInfo({batchSize, numUnits * 4}, ArmnnType, qScale, qOffset);
1306     armnn::TensorInfo cellStateOutTensorInfo({batchSize, numUnits}, ArmnnType, qScale, qOffset);
1307     armnn::TensorInfo outputStateOutTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
1308     armnn::TensorInfo outputTensorInfo({batchSize, outputSize}, ArmnnType, qScale, qOffset);
1309 
1310     LayerTestResult<T, 2> ret(outputTensorInfo);
1311 
1312     std::vector<float> inputVector;
1313     inputVector.assign(input.data(), input.data() + (batchSize * inputSize));
1314     auto inputTensor = MakeTensor<float,2>(inputTensorInfo, inputVector);
1315 
1316     std::vector<float> cellStateInVector(batchSize * numUnits, 0.f);
1317     auto cellStateInTensor = MakeTensor<float,2>(cellStateInTensorInfo, cellStateInVector);
1318 
1319     std::vector<float> outputStateInVector(batchSize * outputSize, 0.f);
1320     auto outputStateInTensor = MakeTensor<float,2>(outputStateInTensorInfo, outputStateInVector);
1321 
1322     std::vector<float> scratchBufferVector(batchSize * numUnits * 4, 0.f);
1323     auto scratchBufferTensor = MakeTensor<float,2>(scratchBufferTensorInfo, scratchBufferVector);
1324 
1325     std::vector<float> outputStateOutVector(batchSize * outputSize, 0.f);
1326     auto outputStateOutTensor = MakeTensor<float,2>(outputStateOutTensorInfo, outputStateOutVector);
1327 
1328     std::vector<float> cellStateOutVector(batchSize * numUnits, 0.f);
1329     auto cellStateOutTensor = MakeTensor<float,2>(cellStateOutTensorInfo, cellStateOutVector);
1330 
1331     std::vector<float> outputVector;
1332     outputVector.assign(outputExpected.data(), outputExpected.data() + (batchSize * outputSize));
1333     ret.outputExpected = MakeTensor<float, 2>(outputTensorInfo, outputVector);
1334 
1335     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
1336     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
1337             tensorHandleFactory.CreateTensorHandle(cellStateInTensorInfo);
1338     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
1339             tensorHandleFactory.CreateTensorHandle(outputStateInTensorInfo);
1340 
1341     std::unique_ptr<armnn::ITensorHandle> scratchHandle =
1342             tensorHandleFactory.CreateTensorHandle(scratchBufferTensorInfo);
1343     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
1344             tensorHandleFactory.CreateTensorHandle(outputStateOutTensorInfo);
1345     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
1346             tensorHandleFactory.CreateTensorHandle(cellStateOutTensorInfo);
1347     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
1348 
1349     armnn::LstmQueueDescriptor data;
1350     armnn::WorkloadInfo info;
1351 
1352     AddInputToWorkload(data, info, inputTensorInfo, inputHandle.get());
1353     AddInputToWorkload(data, info, outputStateInTensorInfo, outputStateInHandle.get());
1354     AddInputToWorkload(data, info, cellStateInTensorInfo, cellStateInHandle.get());
1355 
1356     AddOutputToWorkload(data, info, scratchBufferTensorInfo, scratchHandle.get());
1357     AddOutputToWorkload(data, info, outputStateOutTensorInfo, outputStateOutHandle.get());
1358     AddOutputToWorkload(data, info, cellStateOutTensorInfo, cellStateOutHandle.get());
1359     AddOutputToWorkload(data, info, outputTensorInfo, outputHandle.get());
1360 
1361     armnn::TensorInfo tensorInfo3({outputSize}, constantDataType, qScale, qOffset);
1362     armnn::TensorInfo tensorInfo4({numUnits}, constantDataType, qScale, qOffset);
1363     armnn::TensorInfo tensorInfo4x5({numUnits, inputSize}, constantDataType, qScale, qOffset);
1364     armnn::TensorInfo tensorInfo4x3({numUnits, outputSize}, constantDataType, qScale, qOffset);
1365     armnn::TensorInfo tensorInfo3x4({outputSize, numUnits}, constantDataType, qScale, qOffset);
1366 
1367     auto inputToInputWeights =
1368             MakeTensor<float, 2>(tensorInfo4x5, { 0.5f,  0.6f,  0.7f, -0.8f, -0.9f,
1369                                                   0.1f,  0.2f,  0.3f, -0.4f,  0.5f,
1370                                                  -0.8f,  0.7f, -0.6f,  0.5f, -0.4f,
1371                                                  -0.5f, -0.4f, -0.3f, -0.2f, -0.1f});  //{numUnits, inputSize}
1372 
1373     auto inputToForgetWeights =
1374             MakeTensor<float, 2>(tensorInfo4x5, {-0.6f, -0.1f,  0.3f,  0.2f,  0.9f,
1375                                                  -0.5f, -0.2f, -0.4f,  0.3f, -0.8f,
1376                                                  -0.4f,  0.3f, -0.5f, -0.4f, -0.6f,
1377                                                   0.3f, -0.4f, -0.6f, -0.5f, -0.5f});  //{numUnits, inputSize}
1378 
1379     auto inputToCellWeights =
1380             MakeTensor<float, 2>(tensorInfo4x5, {-0.4f, -0.3f, -0.2f, -0.1f, -0.5f,
1381                                                   0.5f, -0.2f, -0.3f, -0.2f, -0.6f,
1382                                                   0.6f, -0.1f, -0.4f, -0.3f, -0.7f,
1383                                                   0.7f, -0.9f, -0.5f,  0.8f,  0.6f});  //{numUnits, inputSize}
1384 
1385     auto inputToOutputWeights =
1386             MakeTensor<float, 2>(tensorInfo4x5, {-0.8f, -0.4f, -0.2f, -0.9f, -0.1f,
1387                                                  -0.7f,  0.3f, -0.3f, -0.8f, -0.2f,
1388                                                   0.6f, -0.2f,  0.4f, -0.7f, -0.3f,
1389                                                  -0.5f,  0.1f,  0.5f, -0.6f, -0.4f}); //{numUnits, inputSize}
1390 
1391     auto inputGateBias =
1392             MakeTensor<float, 1>(tensorInfo4, {0.03f, 0.15f, 0.22f, 0.38f});  //{numUnits}
1393 
1394     auto forgetGateBias =
1395             MakeTensor<float, 1>(tensorInfo4, {0.1f, -0.3f, -0.2f, 0.1f});    //{numUnits}
1396 
1397     auto cellBias =
1398             MakeTensor<float, 1>(tensorInfo4, {-0.05f, 0.72f, 0.25f, 0.08f}); //{numUnits}
1399 
1400     auto outputGateBias =
1401             MakeTensor<float, 1>(tensorInfo4, {0.05f, -0.01f, 0.2f, 0.1f});   //{numUnits}
1402 
1403     auto recurrentToInputWeights =
1404             MakeTensor<float, 2>(tensorInfo4x3, {-0.2f, -0.3f,  0.4f,
1405                                                   0.1f, -0.5f,  0.9f,
1406                                                  -0.2f, -0.3f, -0.7f,
1407                                                  0.05f, -0.2f, -0.6f});  //{numUnits, outputSize}
1408 
1409     auto recurrentToCellWeights =
1410             MakeTensor<float, 2>(tensorInfo4x3, {-0.3f,  0.2f,   0.1f,
1411                                                  -0.3f,  0.8f, -0.08f,
1412                                                  -0.2f,  0.3f,   0.8f,
1413                                                  -0.6f, -0.1f,   0.2f}); //{numUnits, outputSize}
1414 
1415     auto recurrentToForgetWeights =
1416             MakeTensor<float, 2>(tensorInfo4x3, {-0.5f, -0.3f, -0.5f,
1417                                                  -0.2f,  0.6f,  0.4f,
1418                                                   0.9f,  0.3f, -0.1f,
1419                                                   0.2f,  0.5f,  0.2f});  //{numUnits, outputSize}
1420 
1421     auto recurrentToOutputWeights =
1422             MakeTensor<float, 2>(tensorInfo4x3, { 0.3f, -0.1f,  0.1f,
1423                                                  -0.2f, -0.5f, -0.7f,
1424                                                  -0.2f, -0.6f, -0.1f,
1425                                                  -0.4f, -0.7f, -0.2f});  //{numUnits, outputSize}
1426 
1427     auto cellToInputWeights =
1428             MakeTensor<float, 1>(tensorInfo4, {0.05f, 0.1f, 0.25f, 0.15f});      //{numUnits}
1429 
1430     auto cellToForgetWeights =
1431             MakeTensor<float, 1>(tensorInfo4, {-0.02f, -0.15f, -0.25f, -0.03f}); //{numUnits}
1432 
1433     auto cellToOutputWeights =
1434             MakeTensor<float, 1>(tensorInfo4, {0.1f, -0.1f, -0.5f, 0.05f});      //{numUnits}
1435 
1436     auto projectionWeights =
1437             MakeTensor<float, 2>(tensorInfo3x4,
1438                                  {-0.1f, 0.2f, 0.01f, -0.2f,
1439                                    0.1f, 0.5f,  0.3f, 0.08f,
1440                                   0.07f, 0.2f, -0.4f,  0.2f}); //{outputSize, numUnits}
1441 
1442     std::vector<float> projectionBiasVector(outputSize, 0.f);
1443     auto projectionBias = MakeTensor<float,1>(tensorInfo3, projectionBiasVector); //{outputSize}
1444 
1445     auto inputLayerNormWeights =
1446             MakeTensor<float, 1>(tensorInfo4, {0.1f, 0.2f, 0.3f, 0.5f}); //{numUnits}
1447 
1448     auto forgetLayerNormWeights =
1449             MakeTensor<float, 1>(tensorInfo4, {0.2f, 0.2f, 0.4f, 0.3f}); //{numUnits}
1450 
1451     auto cellLayerNormWeights =
1452             MakeTensor<float, 1>(tensorInfo4, {0.7f, 0.2f, 0.3f, 0.8f}); //{numUnits}
1453 
1454     auto outputLayerNormWeights =
1455             MakeTensor<float, 1>(tensorInfo4, {0.6f, 0.2f, 0.2f, 0.5f}); //{numUnits}
1456 
1457 
1458     armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(tensorInfo4x5);
1459     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(tensorInfo4x5);
1460     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(tensorInfo4x5);
1461     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(tensorInfo4x5);
1462     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(tensorInfo4x3);
1463     armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(tensorInfo4x3);
1464     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(tensorInfo4x3);
1465     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(tensorInfo4x3);
1466     armnn::ScopedCpuTensorHandle cellToInputWeightsTensor(tensorInfo4);
1467     armnn::ScopedCpuTensorHandle inputGateBiasTensor(tensorInfo4);
1468     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(tensorInfo4);
1469     armnn::ScopedCpuTensorHandle cellBiasTensor(tensorInfo4);
1470     armnn::ScopedCpuTensorHandle outputGateBiasTensor(tensorInfo4);
1471     armnn::ScopedCpuTensorHandle cellToForgetWeightsTensor(tensorInfo4);
1472     armnn::ScopedCpuTensorHandle cellToOutputWeightsTensor(tensorInfo4);
1473     armnn::ScopedCpuTensorHandle projectionWeightsTensor(tensorInfo3x4);
1474     armnn::ScopedCpuTensorHandle projectionBiasTensor(tensorInfo3);
1475 
1476     armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(tensorInfo4);
1477     armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(tensorInfo4);
1478     armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(tensorInfo4);
1479     armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(tensorInfo4);
1480 
1481     AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
1482     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
1483     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
1484     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
1485     AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
1486     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
1487     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
1488     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
1489     AllocateAndCopyDataToITensorHandle(&cellToInputWeightsTensor, &cellToInputWeights[0]);
1490     AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
1491     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
1492     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
1493     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
1494     AllocateAndCopyDataToITensorHandle(&cellToForgetWeightsTensor, &cellToForgetWeights[0]);
1495     AllocateAndCopyDataToITensorHandle(&cellToOutputWeightsTensor, &cellToOutputWeights[0]);
1496     AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
1497     AllocateAndCopyDataToITensorHandle(&projectionBiasTensor, &projectionBias[0]);
1498 
1499     AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]);
1500     AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
1501     AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
1502     AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
1503 
1504     data.m_InputToInputWeights = &inputToInputWeightsTensor;
1505     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
1506     data.m_InputToCellWeights = &inputToCellWeightsTensor;
1507     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
1508     data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
1509     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
1510     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
1511     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
1512     data.m_CellToInputWeights = &cellToInputWeightsTensor;
1513     data.m_InputGateBias = &inputGateBiasTensor;
1514     data.m_ForgetGateBias = &forgetGateBiasTensor;
1515     data.m_CellBias = &cellBiasTensor;
1516     data.m_OutputGateBias = &outputGateBiasTensor;
1517     data.m_CellToForgetWeights = &cellToForgetWeightsTensor;
1518     data.m_CellToOutputWeights = &cellToOutputWeightsTensor;
1519     data.m_ProjectionWeights = &projectionWeightsTensor;
1520     data.m_ProjectionBias = &projectionBiasTensor;
1521 
1522     data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor;
1523     data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
1524     data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
1525     data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
1526 
1527     // Flags to set test configuration
1528     data.m_Parameters.m_ActivationFunc = 4;
1529     data.m_Parameters.m_CifgEnabled = false;
1530     data.m_Parameters.m_PeepholeEnabled = true;
1531     data.m_Parameters.m_ProjectionEnabled = true;
1532     data.m_Parameters.m_LayerNormEnabled = true;
1533 
1534 
1535     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateLstm(data, info);
1536     inputHandle->Allocate();
1537     outputStateInHandle->Allocate();
1538     cellStateInHandle->Allocate();
1539 
1540     scratchHandle->Allocate();
1541     outputStateOutHandle->Allocate();
1542     cellStateOutHandle->Allocate();
1543     outputHandle->Allocate();
1544 
1545     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
1546     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
1547     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
1548 
1549     workload->Execute();
1550 
1551     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
1552 
1553     return ret;
1554 }
1555 
QuantizedLstmTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<uint8_t,2> & input,const boost::multi_array<uint8_t,2> & outputExpected)1556 LayerTestResult<uint8_t, 2> QuantizedLstmTestImpl(
1557     armnn::IWorkloadFactory& workloadFactory,
1558     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1559     const armnn::ITensorHandleFactory& tensorHandleFactory,
1560     const boost::multi_array<uint8_t, 2>& input,
1561     const boost::multi_array<uint8_t, 2>& outputExpected)
1562 {
1563     IgnoreUnused(memoryManager);
1564     auto numBatches = armnn::numeric_cast<unsigned int>(input.shape()[0]);
1565     auto inputSize = armnn::numeric_cast<unsigned int>(input.shape()[1]);
1566     auto outputSize = armnn::numeric_cast<unsigned int>(outputExpected.shape()[1]);
1567 
1568     // Scale/Offset for input/output, cellState In/Out, weights, bias
1569     float inputOutputScale = 0.0078125f;
1570     int32_t inputOutputOffset = 128;
1571 
1572     float cellStateScale = 0.00048828125f;
1573     int32_t cellStateOffset = 0;
1574 
1575     float weightsScale = 0.00408021f;
1576     int32_t weightsOffset = 100;
1577 
1578     float biasScale = 3.1876640625e-05f;
1579     int32_t biasOffset = 0;
1580 
1581     // Input/Output tensor info
1582     armnn::TensorInfo inputInfo({numBatches , inputSize},
1583                                  armnn::DataType::QAsymmU8,
1584                                  inputOutputScale,
1585                                  inputOutputOffset);
1586 
1587     armnn::TensorInfo cellStateInfo({numBatches , outputSize},
1588                                      armnn::DataType::QSymmS16,
1589                                      cellStateScale,
1590                                      cellStateOffset);
1591 
1592     armnn::TensorInfo outputStateInfo({numBatches , outputSize},
1593                                        armnn::DataType::QAsymmU8,
1594                                        inputOutputScale,
1595                                        inputOutputOffset);
1596 
1597     LayerTestResult<uint8_t, 2> ret(outputStateInfo);
1598 
1599     // Input0
1600     std::vector<uint8_t> inputVector;
1601     inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
1602     auto inputTensor = MakeTensor<uint8_t, 2>(inputInfo, inputVector);
1603 
1604     // Input1
1605     std::vector<int16_t> cellStateInVector   = {876, 1034, 955, -909, 761, 1029, 796, -1036}; // 13
1606     auto cellStateInTensor   = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
1607 
1608     // Input2
1609     std::vector<uint8_t> outputStateInVector = {136, 150, 140, 115, 135, 152, 138, 112}; // 14
1610     auto outputStateInTensor = MakeTensor<uint8_t, 2>(outputStateInfo, outputStateInVector);
1611 
1612     // Output0
1613     std::vector<int16_t> cellStateOutVector  = {1485, 1177, 1373, -1023, 1019, 1355, 1097, -1235}; // 0
1614     auto cellStateOutTensor  = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
1615 
1616     // Output1
1617     std::vector<uint8_t> outputVector; // 1
1618     outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
1619     ret.outputExpected = MakeTensor<uint8_t, 2>(outputStateInfo, outputVector);
1620 
1621     // Create tensor handles
1622     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
1623     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
1624             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
1625     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
1626             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
1627 
1628     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
1629             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
1630     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputStateInfo);
1631 
1632     armnn::QuantizedLstmQueueDescriptor data;
1633     armnn::WorkloadInfo info;
1634 
1635     // Add inputs and outputs to workload
1636     AddInputToWorkload(data, info, inputInfo, inputHandle.get());
1637     AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
1638     AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
1639 
1640     AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
1641     AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
1642 
1643     // Weights and bias tensor and quantization info
1644     armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
1645                                         armnn::DataType::QAsymmU8,
1646                                         weightsScale,
1647                                         weightsOffset);
1648 
1649     armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
1650                                             armnn::DataType::QAsymmU8,
1651                                             weightsScale,
1652                                             weightsOffset);
1653 
1654     armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
1655 
1656     // Weights and bias tensor data
1657     auto inputToInputWeights  = MakeTensor<uint8_t, 2>(inputWeightsInfo, {146, 250, 235, 171, 10, 218, 171, 108});
1658     auto inputToForgetWeights = MakeTensor<uint8_t, 2>(inputWeightsInfo, {24, 50, 132, 179, 158, 110, 3, 169});
1659     auto inputToCellWeights   = MakeTensor<uint8_t, 2>(inputWeightsInfo, {133, 34, 29, 49, 206, 109, 54, 183});
1660     auto inputToOutputWeights = MakeTensor<uint8_t, 2>(inputWeightsInfo, {195, 187, 11, 99, 109, 10, 218, 48});
1661 
1662     auto recurrentToInputWeights  = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
1663             {254, 206, 77, 168, 71, 20, 215, 6, 223, 7, 118, 225, 59, 130, 174, 26});
1664     auto recurrentToForgetWeights = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
1665             {137, 240, 103, 52, 68, 51, 237, 112, 0, 220, 89, 23, 69, 4, 207, 253});
1666     auto recurrentToCellWeights   = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
1667             {172, 60, 205, 65, 14, 0, 140, 168, 240, 223, 133, 56, 142, 64, 246, 216});
1668     auto recurrentToOutputWeights = MakeTensor<uint8_t, 2>(recurrentWeightsInfo,
1669             {106, 214, 67, 23, 59, 158, 45, 3, 119, 132, 49, 205, 129, 218, 11, 98});
1670 
1671     auto inputGateBias  = MakeTensor<int32_t, 1>(biasInfo, {-7876, 13488, -726, 32839});
1672     auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {9206, -46884, -11693, -38724});
1673     auto cellBias       = MakeTensor<int32_t, 1>(biasInfo, {39481, 48624, 48976, -21419});
1674     auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {-58999, -17050, -41852, -40538});
1675 
1676     // ScopedCpuTensorHandles
1677     armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(inputWeightsInfo);
1678     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
1679     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
1680     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
1681 
1682     armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(recurrentWeightsInfo);
1683     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
1684     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
1685     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
1686 
1687     armnn::ScopedCpuTensorHandle inputGateBiasTensor(biasInfo);
1688     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
1689     armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
1690     armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
1691 
1692     // Allocate and copy data
1693     AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
1694     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
1695     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
1696     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
1697 
1698     AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
1699     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
1700     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
1701     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
1702 
1703     AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
1704     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
1705     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
1706     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
1707 
1708     // Setup queue descriptor
1709     data.m_InputToInputWeights = &inputToInputWeightsTensor;
1710     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
1711     data.m_InputToCellWeights = &inputToCellWeightsTensor;
1712     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
1713 
1714     data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
1715     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
1716     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
1717     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
1718 
1719     data.m_InputGateBias = &inputGateBiasTensor;
1720     data.m_ForgetGateBias = &forgetGateBiasTensor;
1721     data.m_CellBias = &cellBiasTensor;
1722     data.m_OutputGateBias = &outputGateBiasTensor;
1723 
1724     // Create workload and allocate tensor handles
1725     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQuantizedLstm(data, info);
1726     inputHandle->Allocate();
1727     outputStateInHandle->Allocate();
1728     cellStateInHandle->Allocate();
1729 
1730     cellStateOutHandle->Allocate();
1731     outputHandle->Allocate();
1732 
1733     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
1734     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
1735     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
1736 
1737     workload->Execute();
1738 
1739     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
1740 
1741     return ret;
1742 }
1743 
1744 // QLSTM: CIFG, LayerNorm
QLstmTestImpl(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<int8_t,2> & input,const boost::multi_array<int8_t,2> & outputExpected)1745 LayerTestResult<int8_t, 2> QLstmTestImpl(
1746         armnn::IWorkloadFactory& workloadFactory,
1747         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1748         const armnn::ITensorHandleFactory& tensorHandleFactory,
1749         const boost::multi_array<int8_t, 2>& input,
1750         const boost::multi_array<int8_t, 2>& outputExpected)
1751 {
1752     IgnoreUnused(memoryManager);
1753     unsigned int numBatches = 2;
1754     unsigned int inputSize  = 5;
1755     unsigned int outputSize = 4;
1756     unsigned int numUnits   = 4;
1757 
1758     bool cifgEnabled       = true;
1759     bool peepholeEnabled   = false;
1760     bool projectionEnabled = false;
1761     bool layerNormEnabled  = true;
1762 
1763     // Scale/Offset quantization info
1764     float inputScale    = 0.0078125f;
1765     int32_t inputOffset = 0;
1766 
1767     int32_t hiddenStateZeroPoint = 0;
1768     float hiddenStateScale       = 0.007f;
1769 
1770     // if (!projectionEnabled) outputScale == hiddenStateScale
1771     float outputScale    = hiddenStateScale;
1772     int32_t outputOffset = hiddenStateZeroPoint;
1773 
1774     float cellStateScale    = 3.05176e-05f;
1775     int32_t cellStateOffset = 0;
1776 
1777     float weightsScale    = 0.00784314f;
1778     int32_t weightsOffset = 0;
1779 
1780     float layerNormScale    = 3.05182e-05f;
1781     int32_t layerNormOffset = 0;
1782 
1783     float biasScale    = layerNormScale / 1024;
1784     int32_t biasOffset = 0;
1785 
1786     float inputIntermediateScale  = 0.007059f;
1787     float forgetIntermediateScale = 0.007812f;
1788     float cellIntermediateScale   = inputIntermediateScale;
1789     float outputIntermediateScale = forgetIntermediateScale;
1790 
1791     float cellClip       = 0.0f;
1792     float projectionClip = 0.0f;
1793 
1794     // Input/Output tensor info
1795     armnn::TensorInfo inputInfo({numBatches , inputSize},
1796                                 armnn::DataType::QAsymmS8,
1797                                 inputScale,
1798                                 inputOffset);
1799 
1800     armnn::TensorInfo cellStateInfo({numBatches , numUnits},
1801                                     armnn::DataType::QSymmS16,
1802                                     cellStateScale,
1803                                     cellStateOffset);
1804 
1805     armnn::TensorInfo outputStateInfo({numBatches , outputSize},
1806                                       armnn::DataType::QAsymmS8,
1807                                       outputScale,
1808                                       outputOffset);
1809 
1810     LayerTestResult<int8_t, 2> ret(outputStateInfo);
1811 
1812     // Input tensors
1813     std::vector<int8_t> inputVector;
1814     inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
1815     auto inputTensor = MakeTensor<int8_t, 2>(inputInfo, inputVector);
1816 
1817     std::vector<int16_t> cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
1818     auto cellStateInTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
1819 
1820     std::vector<int8_t> outputStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
1821     auto outputStateInTensor = MakeTensor<int8_t, 2>(outputStateInfo, outputStateInVector);
1822 
1823     // Output tensors
1824     std::vector<int16_t> cellStateOutVector  = {-11692, 9960, 5491, 8861, -9422, 7726, 2056, 13149};
1825     auto cellStateOutTensor  = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
1826 
1827     std::vector<int8_t> outputVector;
1828     outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
1829     ret.outputExpected = MakeTensor<int8_t, 2>(outputStateInfo, outputVector);
1830 
1831     // Create tensor handles
1832     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
1833     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
1834             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
1835     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
1836             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
1837 
1838     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
1839             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
1840     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
1841             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
1842     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputStateInfo);
1843 
1844     armnn::QLstmQueueDescriptor data;
1845     armnn::WorkloadInfo info;
1846 
1847     // Add inputs and outputs to workload
1848     AddInputToWorkload(data, info, inputInfo, inputHandle.get());
1849     AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
1850     AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
1851 
1852     AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get());
1853     AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
1854     AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
1855 
1856     // Weights and bias tensor and quantization info
1857     armnn::TensorInfo inputWeightsInfo({outputSize, inputSize},
1858                                        armnn::DataType::QSymmS8,
1859                                        weightsScale,
1860                                        weightsOffset);
1861 
1862     armnn::TensorInfo recurrentWeightsInfo({outputSize, outputSize},
1863                                            armnn::DataType::QSymmS8,
1864                                            weightsScale,
1865                                            weightsOffset);
1866 
1867     armnn::TensorInfo biasInfo({outputSize}, armnn::DataType::Signed32, biasScale, biasOffset);
1868 
1869     armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
1870 
1871     // Weights and bias tensor data
1872     auto inputToForgetWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
1873             {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64});
1874     auto inputToCellWeights   = MakeTensor<int8_t, 2>(inputWeightsInfo,
1875             {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77});
1876     auto inputToOutputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
1877             {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51});
1878 
1879     auto recurrentToForgetWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
1880             {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25, 25, 38, -13, 51});
1881     auto recurrentToCellWeights   = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
1882             {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25, 38, -13, 25, 64});
1883     auto recurrentToOutputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
1884             {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25, 13, 64, 25, -38});
1885 
1886     auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {2147484, -6442451, -4294968, 2147484});
1887     auto cellBias       = MakeTensor<int32_t, 1>(biasInfo, {-1073742, 15461883, 5368709, 1717987});
1888     auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {1073742, -214748, 4294968, 2147484});
1889 
1890     auto forgetLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {6553, 6553, 13107, 9830});
1891     auto cellLayerNormWeights   = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {22937, 6553, 9830, 26214});
1892     auto outputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {19660, 6553, 6553, 16384});
1893 
1894     // ScopedCpuTensorHandles
1895     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
1896     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
1897     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
1898 
1899     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
1900     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
1901     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
1902 
1903     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
1904     armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
1905     armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
1906 
1907     armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo);
1908     armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo);
1909     armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo);
1910 
1911     // Allocate and copy data
1912     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
1913     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
1914     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
1915 
1916     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
1917     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
1918     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
1919 
1920     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
1921     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
1922     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
1923 
1924     AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
1925     AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
1926     AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
1927 
1928     // Setup queue descriptor
1929     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
1930     data.m_InputToCellWeights = &inputToCellWeightsTensor;
1931     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
1932 
1933     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
1934     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
1935     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
1936 
1937     data.m_ForgetGateBias = &forgetGateBiasTensor;
1938     data.m_CellBias = &cellBiasTensor;
1939     data.m_OutputGateBias = &outputGateBiasTensor;
1940 
1941     data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
1942     data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
1943     data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
1944 
1945     data.m_Parameters.m_CifgEnabled = cifgEnabled;
1946     data.m_Parameters.m_PeepholeEnabled = peepholeEnabled;
1947     data.m_Parameters.m_ProjectionEnabled = projectionEnabled;
1948     data.m_Parameters.m_LayerNormEnabled = layerNormEnabled;
1949 
1950     data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale;
1951     data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale;
1952     data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale;
1953     data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale;
1954 
1955     data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint;
1956     data.m_Parameters.m_HiddenStateScale = hiddenStateScale;
1957 
1958     data.m_Parameters.m_CellClip = cellClip;
1959     data.m_Parameters.m_ProjectionClip = projectionClip;
1960 
1961     // Create workload and allocate tensor handles
1962     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQLstm(data, info);
1963     inputHandle->Allocate();
1964     outputStateInHandle->Allocate();
1965     cellStateInHandle->Allocate();
1966 
1967     outputStateOutHandle->Allocate();
1968     cellStateOutHandle->Allocate();
1969     outputHandle->Allocate();
1970 
1971     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
1972     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
1973     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
1974 
1975     workload->Execute();
1976 
1977     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
1978 
1979     return ret;
1980 }
1981 
1982 // QLSTM: Projection, LayerNorm
QLstmTestImpl1(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<int8_t,2> & input,const boost::multi_array<int8_t,2> & outputExpected)1983 LayerTestResult<int8_t, 2> QLstmTestImpl1(
1984         armnn::IWorkloadFactory& workloadFactory,
1985         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
1986         const armnn::ITensorHandleFactory& tensorHandleFactory,
1987         const boost::multi_array<int8_t, 2>& input,
1988         const boost::multi_array<int8_t, 2>& outputExpected)
1989 {
1990     IgnoreUnused(memoryManager);
1991     unsigned int numBatches = 2;
1992     unsigned int inputSize  = 5;
1993     unsigned int outputSize = 3;
1994     unsigned int numUnits   = 4;
1995 
1996     bool cifgEnabled       = false;
1997     bool peepholeEnabled   = false;
1998     bool projectionEnabled = true;
1999     bool layerNormEnabled  = true;
2000 
2001     // Scale/Offset quantization info
2002     float inputScale    = 0.0078125f;
2003     int32_t inputOffset = 0;
2004 
2005     int32_t hiddenStateZeroPoint = 0;
2006     float hiddenStateScale       = 0.007f;
2007 
2008     // if (!projectionEnabled) outputScale == hiddenStateScale
2009     float outputScale    = 3.05176e-05f;
2010     int32_t outputOffset = 0;
2011 
2012     float cellStateScale    = 3.05176e-05f;
2013     int32_t cellStateOffset = 0;
2014 
2015     float weightsScale    = 0.00784314f;
2016     int32_t weightsOffset = 0;
2017 
2018     float layerNormScale    = 3.05182e-05f;
2019     int32_t layerNormOffset = 0;
2020 
2021     float biasScale    = layerNormScale / 1024;
2022     int32_t biasOffset = 0;
2023 
2024     float projectionWeightsScale = 0.00392157f;
2025 
2026     float inputIntermediateScale  = 0.007059f;
2027     float forgetIntermediateScale = 0.007812f;
2028     float cellIntermediateScale   = inputIntermediateScale;
2029     float outputIntermediateScale = forgetIntermediateScale;
2030 
2031     float cellClip       = 0.0f;
2032     float projectionClip = 0.0f;
2033 
2034     // Input/Output tensor info
2035     armnn::TensorInfo inputInfo({numBatches , inputSize},
2036                                 armnn::DataType::QAsymmS8,
2037                                 inputScale,
2038                                 inputOffset);
2039 
2040     armnn::TensorInfo cellStateInfo({numBatches , numUnits},
2041                                     armnn::DataType::QSymmS16,
2042                                     cellStateScale,
2043                                     cellStateOffset);
2044 
2045     armnn::TensorInfo outputStateInfo({numBatches , outputSize},
2046                                       armnn::DataType::QAsymmS8,
2047                                       outputScale,
2048                                       outputOffset);
2049 
2050     LayerTestResult<int8_t, 2> ret(outputStateInfo);
2051 
2052     // Input tensors
2053     std::vector<int8_t> inputVector;
2054     inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
2055     auto inputTensor = MakeTensor<int8_t, 2>(inputInfo, inputVector);
2056 
2057     std::vector<int16_t> cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
2058     auto cellStateInTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
2059 
2060     std::vector<int8_t> outputStateInVector = {0, 0, 0, 0, 0, 0};
2061     auto outputStateInTensor = MakeTensor<int8_t, 2>(outputStateInfo, outputStateInVector);
2062 
2063     // Output tensors
2064     std::vector<int16_t> cellStateOutVector  = {-14650, 8939, 5771, 6715, -11843, 7847, 1508, 12939};
2065     auto cellStateOutTensor  = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
2066 
2067     std::vector<int8_t> outputVector;
2068     outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
2069     ret.outputExpected = MakeTensor<int8_t, 2>(outputStateInfo, outputVector);
2070 
2071     // Create tensor handles
2072     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
2073     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
2074             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
2075     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
2076             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
2077 
2078     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
2079             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
2080     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
2081             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
2082     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputStateInfo);
2083 
2084     armnn::QLstmQueueDescriptor data;
2085     armnn::WorkloadInfo info;
2086 
2087     // Add inputs and outputs to workload
2088     AddInputToWorkload(data, info, inputInfo, inputHandle.get());
2089     AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
2090     AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
2091 
2092     AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get());
2093     AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
2094     AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
2095 
2096     // Weights and bias tensor and quantization info
2097     armnn::TensorInfo inputWeightsInfo({numUnits, inputSize},
2098                                        armnn::DataType::QSymmS8,
2099                                        weightsScale,
2100                                        weightsOffset);
2101 
2102     armnn::TensorInfo recurrentWeightsInfo({numUnits, outputSize},
2103                                            armnn::DataType::QSymmS8,
2104                                            weightsScale,
2105                                            weightsOffset);
2106 
2107     armnn::TensorInfo biasInfo({numUnits}, armnn::DataType::Signed32, biasScale, biasOffset);
2108 
2109     armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
2110 
2111     armnn::TensorInfo projectionWeightsInfo({outputSize, numUnits},
2112                                             armnn::DataType::QSymmS8,
2113                                             projectionWeightsScale,
2114                                             0);
2115 
2116     // Weights and bias tensor data
2117     auto inputToInputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
2118             {64, 77, 89, -102, -115, 13, 25, 38, -51, 64, -102, 89, -77, 64, -51, -64, -51, -38, -25, -13});
2119     auto inputToForgetWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
2120             {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64});
2121     auto inputToCellWeights   = MakeTensor<int8_t, 2>(inputWeightsInfo,
2122             {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77});
2123     auto inputToOutputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
2124             {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51});
2125 
2126     auto recurrentToInputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2127             {-25, -38, 51, 13, -64, 115, -25, -38, -89, 6, -25, -77});
2128     auto recurrentToForgetWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2129             {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25});
2130     auto recurrentToCellWeights   = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2131             {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25});
2132     auto recurrentToOutputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2133             {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25});
2134 
2135     auto inputGateBias  = MakeTensor<int32_t, 1>(biasInfo, {644245, 3221226, 4724464, 8160438});
2136     auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {2147484, -6442451, -4294968, 2147484});
2137     auto cellBias       = MakeTensor<int32_t, 1>(biasInfo, {-1073742, 15461883, 5368709, 1717987});
2138     auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {1073742, -214748, 4294968, 2147484});
2139 
2140     auto inputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {3277, 6553, 9830, 16384});
2141     auto forgetLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {6553, 6553, 13107, 9830});
2142     auto cellLayerNormWeights   = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {22937, 6553, 9830, 26214});
2143     auto outputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {19660, 6553, 6553, 16384});
2144 
2145     auto projectionWeights = MakeTensor<int8_t, 2>(projectionWeightsInfo,
2146             {-25, 51, 3, -51, 25, 127, 77, 20, 18, 51, -102, 51});
2147 
2148     // ScopedCpuTensorHandles
2149     armnn::ScopedCpuTensorHandle inputToInputWeightsTensor(inputWeightsInfo);
2150     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
2151     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
2152     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
2153 
2154     armnn::ScopedCpuTensorHandle recurrentToInputWeightsTensor(recurrentWeightsInfo);
2155     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
2156     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
2157     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
2158 
2159     armnn::ScopedCpuTensorHandle inputGateBiasTensor(biasInfo);
2160     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
2161     armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
2162     armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
2163 
2164     armnn::ScopedCpuTensorHandle inputLayerNormWeightsTensor(layerNormWeightsInfo);
2165     armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo);
2166     armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo);
2167     armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo);
2168 
2169     armnn::ScopedCpuTensorHandle projectionWeightsTensor(projectionWeightsInfo);
2170 
2171     // Allocate and copy data
2172     AllocateAndCopyDataToITensorHandle(&inputToInputWeightsTensor, &inputToInputWeights[0][0]);
2173     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
2174     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
2175     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
2176 
2177     AllocateAndCopyDataToITensorHandle(&recurrentToInputWeightsTensor, &recurrentToInputWeights[0][0]);
2178     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
2179     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
2180     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
2181 
2182     AllocateAndCopyDataToITensorHandle(&inputGateBiasTensor, &inputGateBias[0]);
2183     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
2184     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
2185     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
2186 
2187     AllocateAndCopyDataToITensorHandle(&inputLayerNormWeightsTensor, &inputLayerNormWeights[0]);
2188     AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
2189     AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
2190     AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
2191 
2192     AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
2193 
2194     // Setup queue descriptor
2195     data.m_InputToInputWeights = &inputToInputWeightsTensor;
2196     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
2197     data.m_InputToCellWeights = &inputToCellWeightsTensor;
2198     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
2199 
2200     data.m_RecurrentToInputWeights = &recurrentToInputWeightsTensor;
2201     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
2202     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
2203     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
2204 
2205     data.m_InputGateBias = &inputGateBiasTensor;
2206     data.m_ForgetGateBias = &forgetGateBiasTensor;
2207     data.m_CellBias = &cellBiasTensor;
2208     data.m_OutputGateBias = &outputGateBiasTensor;
2209 
2210     data.m_InputLayerNormWeights = &inputLayerNormWeightsTensor;
2211     data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
2212     data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
2213     data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
2214 
2215     data.m_ProjectionWeights = &projectionWeightsTensor;
2216 
2217     data.m_Parameters.m_CifgEnabled = cifgEnabled;
2218     data.m_Parameters.m_PeepholeEnabled = peepholeEnabled;
2219     data.m_Parameters.m_ProjectionEnabled = projectionEnabled;
2220     data.m_Parameters.m_LayerNormEnabled = layerNormEnabled;
2221 
2222     data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale;
2223     data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale;
2224     data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale;
2225     data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale;
2226 
2227     data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint;
2228     data.m_Parameters.m_HiddenStateScale = hiddenStateScale;
2229 
2230     data.m_Parameters.m_CellClip = cellClip;
2231     data.m_Parameters.m_ProjectionClip = projectionClip;
2232 
2233     // Create workload and allocate tensor handles
2234     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQLstm(data, info);
2235     inputHandle->Allocate();
2236     outputStateInHandle->Allocate();
2237     cellStateInHandle->Allocate();
2238 
2239     outputStateOutHandle->Allocate();
2240     cellStateOutHandle->Allocate();
2241     outputHandle->Allocate();
2242 
2243     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
2244     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
2245     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
2246 
2247     workload->Execute();
2248 
2249     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
2250 
2251     return ret;
2252 }
2253 
2254 // QLSTM: Projection, CIFG, LayerNorm
QLstmTestImpl2(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory,const boost::multi_array<int8_t,2> & input,const boost::multi_array<int8_t,2> & outputExpected)2255 LayerTestResult<int8_t, 2> QLstmTestImpl2(
2256         armnn::IWorkloadFactory& workloadFactory,
2257         const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2258         const armnn::ITensorHandleFactory& tensorHandleFactory,
2259         const boost::multi_array<int8_t, 2>& input,
2260         const boost::multi_array<int8_t, 2>& outputExpected)
2261 {
2262     IgnoreUnused(memoryManager);
2263     unsigned int numBatches = 2;
2264     unsigned int inputSize  = 5;
2265     unsigned int outputSize = 3;
2266     unsigned int numUnits   = 4;
2267 
2268     bool cifgEnabled       = true;
2269     bool peepholeEnabled   = false;
2270     bool projectionEnabled = true;
2271     bool layerNormEnabled  = true;
2272 
2273     // Scale/Offset quantization info
2274     float inputScale    = 0.0078125f;
2275     int32_t inputOffset = 0;
2276 
2277     int32_t hiddenStateZeroPoint = 0;
2278     float hiddenStateScale       = 0.007f;
2279 
2280     // if (!projectionEnabled) outputScale == hiddenStateScale
2281     float outputScale    = 3.05176e-05f;
2282     int32_t outputOffset = 0;
2283 
2284     float cellStateScale    = 3.05176e-05f;
2285     int32_t cellStateOffset = 0;
2286 
2287     float weightsScale    = 0.00784314f;
2288     int32_t weightsOffset = 0;
2289 
2290     float layerNormScale    = 3.05182e-05f;
2291     int32_t layerNormOffset = 0;
2292 
2293     float biasScale    = layerNormScale / 1024;
2294     int32_t biasOffset = 0;
2295 
2296     float projectionWeightsScale = 0.00392157f;
2297 
2298     float inputIntermediateScale  = 0.007059f;
2299     float forgetIntermediateScale = 0.007812f;
2300     float cellIntermediateScale   = inputIntermediateScale;
2301     float outputIntermediateScale = forgetIntermediateScale;
2302 
2303     float cellClip       = 0.0f;
2304     float projectionClip = 0.0f;
2305 
2306     // Input/Output tensor info
2307     armnn::TensorInfo inputInfo({numBatches , inputSize},
2308                                 armnn::DataType::QAsymmS8,
2309                                 inputScale,
2310                                 inputOffset);
2311 
2312     armnn::TensorInfo cellStateInfo({numBatches , numUnits},
2313                                     armnn::DataType::QSymmS16,
2314                                     cellStateScale,
2315                                     cellStateOffset);
2316 
2317     armnn::TensorInfo outputStateInfo({numBatches , outputSize},
2318                                       armnn::DataType::QAsymmS8,
2319                                       outputScale,
2320                                       outputOffset);
2321 
2322     LayerTestResult<int8_t, 2> ret(outputStateInfo);
2323 
2324     // Input tensors
2325     std::vector<int8_t> inputVector;
2326     inputVector.assign(input.data(), input.data() + (numBatches * inputSize));
2327     auto inputTensor = MakeTensor<int8_t, 2>(inputInfo, inputVector);
2328 
2329     std::vector<int16_t> cellStateInVector = {0, 0, 0, 0, 0, 0, 0, 0};
2330     auto cellStateInTensor = MakeTensor<int16_t, 2>(cellStateInfo, cellStateInVector);
2331 
2332     std::vector<int8_t> outputStateInVector = {0, 0, 0, 0, 0, 0};
2333     auto outputStateInTensor = MakeTensor<int8_t, 2>(outputStateInfo, outputStateInVector);
2334 
2335     // Output tensors
2336     std::vector<int16_t> cellStateOutVector  = {-14650, 8939, 5771, 6715, -11843, 7847, 1508, 12939};
2337     auto cellStateOutTensor  = MakeTensor<int16_t, 2>(cellStateInfo, cellStateOutVector);
2338 
2339     std::vector<int8_t> outputVector;
2340     outputVector.assign(outputExpected.data(), outputExpected.data() + (numBatches * outputSize));
2341     ret.outputExpected = MakeTensor<int8_t, 2>(outputStateInfo, outputVector);
2342 
2343     // Create tensor handles
2344     std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputInfo);
2345     std::unique_ptr<armnn::ITensorHandle> cellStateInHandle =
2346             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
2347     std::unique_ptr<armnn::ITensorHandle> outputStateInHandle =
2348             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
2349 
2350     std::unique_ptr<armnn::ITensorHandle> outputStateOutHandle =
2351             tensorHandleFactory.CreateTensorHandle(outputStateInfo);
2352     std::unique_ptr<armnn::ITensorHandle> cellStateOutHandle =
2353             tensorHandleFactory.CreateTensorHandle(cellStateInfo);
2354     std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputStateInfo);
2355 
2356     armnn::QLstmQueueDescriptor data;
2357     armnn::WorkloadInfo info;
2358 
2359     // Add inputs and outputs to workload
2360     AddInputToWorkload(data, info, inputInfo, inputHandle.get());
2361     AddInputToWorkload(data, info, outputStateInfo, outputStateInHandle.get());
2362     AddInputToWorkload(data, info, cellStateInfo, cellStateInHandle.get());
2363 
2364     AddOutputToWorkload(data, info, outputStateInfo, outputStateOutHandle.get());
2365     AddOutputToWorkload(data, info, cellStateInfo, cellStateOutHandle.get());
2366     AddOutputToWorkload(data, info, outputStateInfo, outputHandle.get());
2367 
2368     // Weights and bias tensor and quantization info
2369     armnn::TensorInfo inputWeightsInfo({numUnits, inputSize},
2370                                        armnn::DataType::QSymmS8,
2371                                        weightsScale,
2372                                        weightsOffset);
2373 
2374     armnn::TensorInfo recurrentWeightsInfo({numUnits, outputSize},
2375                                            armnn::DataType::QSymmS8,
2376                                            weightsScale,
2377                                            weightsOffset);
2378 
2379     armnn::TensorInfo biasInfo({numUnits}, armnn::DataType::Signed32, biasScale, biasOffset);
2380 
2381     armnn::TensorInfo layerNormWeightsInfo({numUnits}, armnn::DataType::QSymmS16, layerNormScale, layerNormOffset);
2382 
2383     armnn::TensorInfo projectionWeightsInfo({outputSize, numUnits},
2384                                             armnn::DataType::QSymmS8,
2385                                             projectionWeightsScale,
2386                                             0);
2387 
2388     // Weights and bias tensor data
2389     auto inputToForgetWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
2390             {-77, -13, 38, 25, 115, -64, -25, -51, 38, -102, -51, 38, -64, -51, -77, 38, -51, -77, -64, -64});
2391     auto inputToCellWeights   = MakeTensor<int8_t, 2>(inputWeightsInfo,
2392             {-51, -38, -25, -13, -64, 64, -25, -38, -25, -77, 77, -13, -51, -38, -89, 89, -115, -64, 102, 77});
2393     auto inputToOutputWeights = MakeTensor<int8_t, 2>(inputWeightsInfo,
2394             {-102, -51, -25, -115, -13, -89, 38, -38, -102, -25, 77, -25, 51, -89, -38, -64, 13, 64, -77, -51});
2395 
2396     auto recurrentToForgetWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2397             {-64, -38, -64, -25, 77, 51, 115, 38, -13, 25, 64, 25});
2398     auto recurrentToCellWeights   = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2399             {-38, 25, 13, -38, 102, -10, -25, 38, 102, -77, -13, 25});
2400     auto recurrentToOutputWeights = MakeTensor<int8_t, 2>(recurrentWeightsInfo,
2401             {38, -13, 13, -25, -64, -89, -25, -77, -13, -51, -89, -25});
2402 
2403     auto forgetGateBias = MakeTensor<int32_t, 1>(biasInfo, {2147484, -6442451, -4294968, 2147484});
2404     auto cellBias       = MakeTensor<int32_t, 1>(biasInfo, {-1073742, 15461883, 5368709, 1717987});
2405     auto outputGateBias = MakeTensor<int32_t, 1>(biasInfo, {1073742, -214748, 4294968, 2147484});
2406 
2407     auto forgetLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {6553, 6553, 13107, 9830});
2408     auto cellLayerNormWeights   = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {22937, 6553, 9830, 26214});
2409     auto outputLayerNormWeights = MakeTensor<int16_t, 1>(layerNormWeightsInfo, {19660, 6553, 6553, 16384});
2410 
2411     auto projectionWeights = MakeTensor<int8_t, 2>(projectionWeightsInfo,
2412             {-25, 51, 3, -51, 25, 127, 77, 20, 18, 51, -102, 51});
2413 
2414     // ScopedCpuTensorHandles
2415     armnn::ScopedCpuTensorHandle inputToForgetWeightsTensor(inputWeightsInfo);
2416     armnn::ScopedCpuTensorHandle inputToCellWeightsTensor(inputWeightsInfo);
2417     armnn::ScopedCpuTensorHandle inputToOutputWeightsTensor(inputWeightsInfo);
2418 
2419     armnn::ScopedCpuTensorHandle recurrentToForgetWeightsTensor(recurrentWeightsInfo);
2420     armnn::ScopedCpuTensorHandle recurrentToCellWeightsTensor(recurrentWeightsInfo);
2421     armnn::ScopedCpuTensorHandle recurrentToOutputWeightsTensor(recurrentWeightsInfo);
2422 
2423     armnn::ScopedCpuTensorHandle forgetGateBiasTensor(biasInfo);
2424     armnn::ScopedCpuTensorHandle cellBiasTensor(biasInfo);
2425     armnn::ScopedCpuTensorHandle outputGateBiasTensor(biasInfo);
2426 
2427     armnn::ScopedCpuTensorHandle forgetLayerNormWeightsTensor(layerNormWeightsInfo);
2428     armnn::ScopedCpuTensorHandle cellLayerNormWeightsTensor(layerNormWeightsInfo);
2429     armnn::ScopedCpuTensorHandle outputLayerNormWeightsTensor(layerNormWeightsInfo);
2430 
2431     armnn::ScopedCpuTensorHandle projectionWeightsTensor(projectionWeightsInfo);
2432 
2433     // Allocate and copy data
2434     AllocateAndCopyDataToITensorHandle(&inputToForgetWeightsTensor, &inputToForgetWeights[0][0]);
2435     AllocateAndCopyDataToITensorHandle(&inputToCellWeightsTensor, &inputToCellWeights[0][0]);
2436     AllocateAndCopyDataToITensorHandle(&inputToOutputWeightsTensor, &inputToOutputWeights[0][0]);
2437 
2438     AllocateAndCopyDataToITensorHandle(&recurrentToForgetWeightsTensor, &recurrentToForgetWeights[0][0]);
2439     AllocateAndCopyDataToITensorHandle(&recurrentToCellWeightsTensor, &recurrentToCellWeights[0][0]);
2440     AllocateAndCopyDataToITensorHandle(&recurrentToOutputWeightsTensor, &recurrentToOutputWeights[0][0]);
2441 
2442     AllocateAndCopyDataToITensorHandle(&forgetGateBiasTensor, &forgetGateBias[0]);
2443     AllocateAndCopyDataToITensorHandle(&cellBiasTensor, &cellBias[0]);
2444     AllocateAndCopyDataToITensorHandle(&outputGateBiasTensor, &outputGateBias[0]);
2445 
2446     AllocateAndCopyDataToITensorHandle(&forgetLayerNormWeightsTensor, &forgetLayerNormWeights[0]);
2447     AllocateAndCopyDataToITensorHandle(&cellLayerNormWeightsTensor, &cellLayerNormWeights[0]);
2448     AllocateAndCopyDataToITensorHandle(&outputLayerNormWeightsTensor, &outputLayerNormWeights[0]);
2449 
2450     AllocateAndCopyDataToITensorHandle(&projectionWeightsTensor, &projectionWeights[0][0]);
2451 
2452     // Setup queue descriptor
2453     data.m_InputToForgetWeights = &inputToForgetWeightsTensor;
2454     data.m_InputToCellWeights = &inputToCellWeightsTensor;
2455     data.m_InputToOutputWeights = &inputToOutputWeightsTensor;
2456 
2457     data.m_RecurrentToForgetWeights = &recurrentToForgetWeightsTensor;
2458     data.m_RecurrentToCellWeights = &recurrentToCellWeightsTensor;
2459     data.m_RecurrentToOutputWeights = &recurrentToOutputWeightsTensor;
2460 
2461     data.m_ForgetGateBias = &forgetGateBiasTensor;
2462     data.m_CellBias = &cellBiasTensor;
2463     data.m_OutputGateBias = &outputGateBiasTensor;
2464 
2465     data.m_ForgetLayerNormWeights = &forgetLayerNormWeightsTensor;
2466     data.m_CellLayerNormWeights = &cellLayerNormWeightsTensor;
2467     data.m_OutputLayerNormWeights = &outputLayerNormWeightsTensor;
2468 
2469     data.m_ProjectionWeights = &projectionWeightsTensor;
2470 
2471     data.m_Parameters.m_CifgEnabled = cifgEnabled;
2472     data.m_Parameters.m_PeepholeEnabled = peepholeEnabled;
2473     data.m_Parameters.m_ProjectionEnabled = projectionEnabled;
2474     data.m_Parameters.m_LayerNormEnabled = layerNormEnabled;
2475 
2476     data.m_Parameters.m_InputIntermediateScale = inputIntermediateScale;
2477     data.m_Parameters.m_ForgetIntermediateScale = forgetIntermediateScale;
2478     data.m_Parameters.m_CellIntermediateScale = cellIntermediateScale;
2479     data.m_Parameters.m_OutputIntermediateScale = outputIntermediateScale;
2480 
2481     data.m_Parameters.m_HiddenStateZeroPoint = hiddenStateZeroPoint;
2482     data.m_Parameters.m_HiddenStateScale = hiddenStateScale;
2483 
2484     data.m_Parameters.m_CellClip = cellClip;
2485     data.m_Parameters.m_ProjectionClip = projectionClip;
2486 
2487     // Create workload and allocate tensor handles
2488     std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateQLstm(data, info);
2489     inputHandle->Allocate();
2490     outputStateInHandle->Allocate();
2491     cellStateInHandle->Allocate();
2492 
2493     outputStateOutHandle->Allocate();
2494     cellStateOutHandle->Allocate();
2495     outputHandle->Allocate();
2496 
2497     CopyDataToITensorHandle(inputHandle.get(), &inputTensor[0][0]);
2498     CopyDataToITensorHandle(outputStateInHandle.get(), &outputStateInTensor[0][0]);
2499     CopyDataToITensorHandle(cellStateInHandle.get(), &cellStateInTensor[0][0]);
2500 
2501     workload->Execute();
2502 
2503     CopyDataFromITensorHandle(&ret.output[0][0], outputHandle.get());
2504 
2505     return ret;
2506 }
2507 
2508 
2509 } // anonymous namespace
2510 
2511 #if defined(ARMNNREF_ENABLED)
2512 
2513 // The LSTM test units are run only for the reference backend at the moment
2514 
LstmUtilsZeroVectorTest()2515 void LstmUtilsZeroVectorTest()
2516 {
2517     armnn::TensorInfo inputDesc({4}, armnn::DataType::Float32);
2518     boost::multi_array<float, 1> input = MakeTensor<float, 1>(inputDesc, std::vector<float>(
2519             {2., 3., 3., 4.}));
2520 
2521     boost::multi_array<float, 1> expectedOutput = MakeTensor<float, 1>(inputDesc, std::vector<float>(
2522             {0., 0., 0., 0.}));
2523 
2524     return LstmUtilsZeroVectorTestImpl<armnn::DataType::Float32>(input, 4, expectedOutput);
2525 }
2526 
LstmUtilsMeanStddevNormalizationNoneZeroInputTest()2527 void LstmUtilsMeanStddevNormalizationNoneZeroInputTest()
2528 {
2529     uint32_t batchSize = 2;
2530     uint32_t vecSize = 4;
2531     armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
2532     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2533             { 0.1f, 0.2f, 0.3f, 0.4f,      //batch 0
2534               0.9f, 1.0f, 1.1f, 1.2f }));  //batch 1
2535 
2536     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2537             { -1.34164071f, -0.447213531f, 0.44721365f,  1.34164071f,      //batch 0
2538               -1.34163153f, -0.447210163f, 0.447211236f, 1.3416326f  }));  //batch 1
2539 
2540     return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
2541             vecSize, batchSize, expectedOutput);
2542 }
2543 
LstmUtilsMeanStddevNormalizationAllZeroInputTest()2544 void LstmUtilsMeanStddevNormalizationAllZeroInputTest()
2545 {
2546     uint32_t batchSize = 2;
2547     uint32_t vecSize = 4;
2548     armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
2549     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2550             { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
2551               0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
2552 
2553     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2554             { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
2555               0.0f, 0.0f, 0.0f, 0.0f }));  //batch 1
2556 
2557     return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
2558             vecSize, batchSize, expectedOutput);
2559 }
2560 
LstmUtilsMeanStddevNormalizationMixedZeroInputTest()2561 void LstmUtilsMeanStddevNormalizationMixedZeroInputTest()
2562 {
2563     uint32_t batchSize = 2;
2564     uint32_t vecSize = 4;
2565     armnn::TensorInfo inputDesc({batchSize, vecSize}, armnn::DataType::Float32);
2566     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2567             { 0.0f, 0.0f, 0.0f, 0.0f,      //batch 0
2568               0.1f, 0.2f, 0.3f, 0.4f }));  //batch 1
2569 
2570     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2571             {         0.0f,          0.0f,        0.0f,        0.0f,      //batch 0
2572               -1.34164071f, -0.447213531f, 0.44721365f, 1.34164071f }));  //batch 1
2573 
2574     return LstmUtilsMeanStddevNormalizationTestImpl<armnn::DataType::Float32>(input,
2575             vecSize, batchSize, expectedOutput);
2576 }
2577 
LstmUtilsVectorBatchVectorCwiseProductTest()2578 void LstmUtilsVectorBatchVectorCwiseProductTest()
2579 {
2580     uint32_t batchSize = 4;
2581     uint32_t vecSize = 29;
2582     armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
2583     boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
2584             {   1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f, 10.1f,
2585               11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f, 20.2f,
2586               21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,     0.0f}));
2587 
2588     armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
2589     boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2590             { /* batch 0 */
2591                 1.1f,   2.2f,   3.3f,   4.4f,   5.5f,   6.6f,   7.7f,   8.8f,   9.9f,  10.1f,
2592               11.11f, 12.12f, 13.13f, 14.14f, 15.15f, 16.16f, 17.17f, 18.18f, 19.19f,  20.2f,
2593               21.21f, 22.22f, 23.23f, 24.24f, 25.25f, 26.26f, 27.27f, 28.28f,   0.0f,
2594               /* batch 1 */
2595                 -1.1f,   -2.2f,   -3.3f,   -4.4f,   -5.5f,   -6.6f,   -7.7f,   -8.8f,   -9.9f, -10.1f,
2596               -11.11f, -12.12f, -13.13f, -14.14f, -15.15f, -16.16f, -17.17f, -18.18f, -19.19f, -20.2f,
2597               -21.21f, -22.22f, -23.23f, -24.24f, -25.25f, -26.26f, -27.27f, -28.28f,    0.0f,
2598               /* batch 2 */
2599                 1.1f,   -2.2f,   3.3f,   -4.4f,   5.5f,   -6.6f,   7.7f,   -8.8f,   9.9f, -10.1f,
2600               11.11f, -12.12f, 13.13f, -14.14f, 15.15f, -16.16f, 17.17f, -18.18f, 19.19f, -20.2f,
2601               21.21f, -22.22f, 23.23f, -24.24f, 25.25f, -26.26f, 27.27f, -28.28f,   0.0f,
2602               /* batch 3 */
2603                 -1.1f,   2.2f,   -3.3f,   4.4f,   -5.5f,   6.6f,   -7.7f,   8.8f,   -9.9f, 10.1f,
2604               -11.11f, 12.12f, -13.13f, 14.14f, -15.15f, 16.16f, -17.17f, 18.18f, -19.19f, 20.2f,
2605               -21.21f, 22.22f, -23.23f, 24.24f, -25.25f, 26.26f, -27.27f, 28.28f,    0.0f}));
2606 
2607     // Expect output = input * output + output.
2608     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2609             { /* batch 0 */
2610                  1.210000f,    4.840000f,   10.889999f,   19.360001f,   30.250000f,   43.559998f,
2611                 59.289997f,   77.440002f,   98.009995f,  102.010010f,  123.432091f,  146.894394f,
2612                172.396896f,  199.939606f,  229.522491f,  261.145599f,  294.808899f,  330.512421f,
2613                368.256134f,  408.040039f,  449.864075f,  493.728363f,  539.632874f,  587.577576f,
2614                637.562500f,  689.587585f,  743.652954f,  799.758423f,    0.000000f,
2615               /* batch 1 */
2616                 -1.210000f,   -4.840000f,  -10.889999f,  -19.360001f,  -30.250000f,  -43.559998f,
2617                -59.289997f,  -77.440002f,  -98.009995f, -102.010010f, -123.432091f, -146.894394f,
2618               -172.396896f, -199.939606f, -229.522491f, -261.145599f, -294.808899f, -330.512421f,
2619               -368.256134f, -408.040039f, -449.864075f, -493.728363f, -539.632874f, -587.577576f,
2620               -637.562500f, -689.587585f, -743.652954f, -799.758423f,    0.000000f,
2621               /* batch 2 */
2622                  1.210000f,   -4.840000f,  10.889999f,   -19.360001f,   30.250000f,  -43.559998f,
2623                 59.289997f,  -77.440002f,  98.009995f,  -102.010010f,  123.432091f, -146.894394f,
2624                172.396896f, -199.939606f, 229.522491f,  -261.145599f,  294.808899f, -330.512421f,
2625                368.256134f, -408.040039f, 449.864075f,  -493.728363f,  539.632874f, -587.577576f,
2626                637.562500f, -689.587585f, 743.652954f,  -799.758423f,    0.000000f,
2627               /* batch 3 */
2628                 -1.210000f,    4.840000f,  -10.889999f,   19.360001f,  -30.250000f,   43.559998f,
2629                -59.289997f,   77.440002f,  -98.009995f,  102.010010f, -123.432091f,  146.894394f,
2630               -172.396896f,  199.939606f, -229.522491f,  261.145599f, -294.808899f,  330.512421f,
2631               -368.256134f,  408.040039f, -449.864075f,  493.728363f, -539.632874f,  587.577576f,
2632               -637.562500f,  689.587585f, -743.652954f,  799.758423f,    0.000000f}));
2633 
2634     return LstmUtilsVectorBatchVectorCwiseProductTestImpl<armnn::DataType::Float32>(vector, batchVector,
2635             vecSize, batchSize, expectedOutput);
2636 }
2637 
LstmUtilsVectorBatchVectorAddTest()2638 void LstmUtilsVectorBatchVectorAddTest()
2639 {
2640     uint32_t batchSize = 2;
2641     uint32_t vecSize = 3;
2642     armnn::TensorInfo vecDesc({vecSize}, armnn::DataType::Float32);
2643     boost::multi_array<float, 1> vector = MakeTensor<float, 1>(vecDesc, std::vector<float>(
2644             { 0.0f, -0.5f, 1.0f}));
2645 
2646     armnn::TensorInfo batchVecDesc({batchSize, vecSize}, armnn::DataType::Float32);
2647     boost::multi_array<float, 2> batchVector = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2648             { 1.0f, 2.0f, 3.0f,    //batch 0
2649               4.0f, 5.0f, 6.0f})); //batch 1
2650 
2651     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(batchVecDesc, std::vector<float>(
2652             { 1.0f, 1.5f, 4.0f,
2653               4.0f, 4.5f, 7.0f}));
2654 
2655     return LstmUtilsVectorBatchVectorAddTestImpl<armnn::DataType::Float32>(vector, batchVector,
2656             vecSize, batchSize, expectedOutput);
2657 }
2658 
2659 #endif
2660 
LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2661 LayerTestResult<float, 2> LstmLayerFloat32WithCifgWithPeepholeNoProjectionTest(
2662     armnn::IWorkloadFactory& workloadFactory,
2663     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2664     const armnn::ITensorHandleFactory& tensorHandleFactory)
2665 {
2666     armnn::TensorInfo inputDesc({ 2, 2 }, armnn::DataType::Float32);
2667     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2668             { 2., 3., 3., 4. }));
2669 
2670     armnn::TensorInfo outputDesc({ 2, 4 }, armnn::DataType::Float32);
2671     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2672             {-0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
2673              -0.42734814f, -0.00478661f,  0.13455015f, -0.03560682f}));
2674     return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
2675         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2676 }
2677 
LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2678 LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionTest(
2679     armnn::IWorkloadFactory& workloadFactory,
2680     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2681     const armnn::ITensorHandleFactory& tensorHandleFactory)
2682 {
2683     armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
2684     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2685             {0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
2686              0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f}));
2687 
2688     armnn::TensorInfo outputDesc({ 2, 16 }, armnn::DataType::Float32);
2689     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2690             {-0.00396806f, 0.029352f,     -0.00279226f, 0.0159977f,   -0.00835576f,
2691              -0.0211779f,  0.0283512f,    -0.0114597f,  0.00907307f,  -0.0244004f,
2692              -0.0152191f,  -0.0259063f,   0.00914318f,  0.00415118f,  0.017147f,
2693              0.0134203f, -0.013869f,    0.0287268f,   -0.00334693f, 0.00733398f,  -0.0287926f,
2694              -0.0186926f,   0.0193662f,   -0.0115437f,  0.00422612f,  -0.0345232f,
2695              0.00223253f,   -0.00957321f, 0.0210624f,   0.013331f,    0.0150954f,
2696              0.02168f}));
2697     return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<armnn::DataType::Float32>(
2698         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2699 }
2700 
LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2701 LayerTestResult<float, 2> LstmLayerFloat32NoCifgNoPeepholeNoProjectionTest(
2702     armnn::IWorkloadFactory& workloadFactory,
2703     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2704     const armnn::ITensorHandleFactory& tensorHandleFactory)
2705 {
2706     armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::Float32);
2707     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2708             {2., 3., 3., 4.}));
2709 
2710     armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::Float32);
2711     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2712             {{-0.02973187f, 0.1229473f,   0.20885126f, -0.15358765f,
2713               -0.0185422f,   0.11281417f,  0.24466537f, -0.1826292f}}));
2714 
2715     return LstmNoCifgNoPeepholeNoProjectionTestImpl<armnn::DataType::Float32>(
2716         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2717 }
2718 
LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2719 LayerTestResult<float, 2> LstmLayerFloat32NoCifgWithPeepholeWithProjectionWithLayerNormTest(
2720     armnn::IWorkloadFactory& workloadFactory,
2721     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2722     const armnn::ITensorHandleFactory& tensorHandleFactory)
2723 {
2724     armnn::TensorInfo inputDesc({ 2, 5 }, armnn::DataType::Float32);
2725     boost::multi_array<float, 2> input = MakeTensor<float, 2>(inputDesc, std::vector<float>(
2726             {0.7f, 0.8f, 0.1f, 0.2f, 0.3f,     //batch 0
2727              0.3f, 0.2f, 0.9f, 0.8f, 0.1f}));  //batch 1
2728 
2729     armnn::TensorInfo outputDesc({ 2, 3 }, armnn::DataType::Float32);
2730     boost::multi_array<float, 2> expectedOutput = MakeTensor<float, 2>(outputDesc, std::vector<float>(
2731             {  0.0244077f,  0.128027f, -0.00170918f,    //batch 0
2732              -0.00692428f, 0.0848741f,    0.063445f})); //batch 1
2733     return LstmLayerNoCifgWithPeepholeWithProjectionWithLayerNormTestImpl<armnn::DataType::Float32>(
2734         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2735 }
2736 
LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2737 LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionTest(
2738     armnn::IWorkloadFactory& workloadFactory,
2739     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2740     const armnn::ITensorHandleFactory& tensorHandleFactory)
2741 {
2742     const float qScale = 1.0f;
2743     const int32_t qOffset = 0;
2744 
2745     const armnn::DataType datatype = armnn::DataType::QSymmS16;
2746     const armnn::DataType constantDatatype = armnn::DataType::QAsymmU8;
2747 
2748     armnn::TensorInfo inputDesc({2, 2}, datatype);
2749     boost::multi_array<int16_t , 2> input = MakeTensor<int16_t , 2>(
2750         inputDesc,
2751         armnnUtils::QuantizedVector<int16_t>({ 2.f, 3.f, 3.f, 4.f }, qScale, qOffset));
2752 
2753     armnn::TensorInfo outputDesc({2, 4}, datatype);
2754     boost::multi_array<int16_t, 2> expectedOutput = MakeTensor<int16_t, 2>(
2755         outputDesc,
2756         armnnUtils::QuantizedVector<int16_t>(
2757             {
2758                 -0.02973187f, 0.12294730f, 0.20885126f, -0.15358765f,
2759                 -0.01854220f, 0.11281417f, 0.24466537f, -0.18262920f
2760             },
2761             qScale, qOffset));
2762 
2763     return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
2764         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput, qScale, qOffset, constantDatatype);
2765 
2766 }
2767 
LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2768 LayerTestResult<int16_t, 2> LstmLayerInt16WithCifgWithPeepholeNoProjectionTest(
2769     armnn::IWorkloadFactory& workloadFactory,
2770     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2771     const armnn::ITensorHandleFactory& tensorHandleFactory)
2772 {
2773     const float qScale = 1.0f;
2774     const int32_t qOffset = 0;
2775 
2776     const armnn::DataType datatype = armnn::DataType::QSymmS16;
2777     const armnn::DataType constantDatatype = armnn::DataType::QAsymmU8;
2778 
2779     armnn::TensorInfo inputDesc({ 2, 2 }, datatype);
2780     boost::multi_array<int16_t, 2> input =
2781         MakeTensor<int16_t, 2>(
2782             inputDesc,
2783             armnnUtils::QuantizedVector<int16_t>({ 2.f, 3.f, 3.f, 4.f }, qScale, qOffset));
2784 
2785     armnn::TensorInfo outputDesc({ 2, 4 }, datatype);
2786     boost::multi_array<int16_t, 2> expectedOutput =
2787         MakeTensor<int16_t, 2>(
2788             outputDesc,
2789             armnnUtils::QuantizedVector<int16_t>(
2790                 {
2791                     -0.36444446f, -0.00352185f, 0.12886585f, -0.05163646f,
2792                     -0.42734814f, -0.00478661f, 0.13455015f, -0.03560682f
2793                 },
2794                 qScale, qOffset));
2795 
2796     return LstmLayerWithCifgWithPeepholeNoProjectionTestImpl<datatype>(
2797         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput, qScale, qOffset, constantDatatype);
2798 }
2799 
LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2800 LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgWithPeepholeWithProjectionTest(
2801     armnn::IWorkloadFactory& workloadFactory,
2802     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2803     const armnn::ITensorHandleFactory& tensorHandleFactory)
2804 {
2805     const float qScale = 2.0f;
2806     const int32_t qOffset = 0;
2807 
2808     const armnn::DataType datatype = armnn::DataType::QSymmS16;
2809     const armnn::DataType constantDatatype = armnn::DataType::QAsymmU8;
2810 
2811     armnn::TensorInfo inputDesc({ 2, 5 }, datatype);
2812     boost::multi_array<int16_t, 2> input =
2813         MakeTensor<int16_t, 2>(
2814             inputDesc,
2815             armnnUtils::QuantizedVector<int16_t>(
2816                 {
2817                     0.787926f, 0.151646f, 0.071352f, 0.118426f, 0.458058f,
2818                     0.295743f, 0.544053f, 0.690064f, 0.858138f, 0.497181f
2819                 },
2820                 qScale, qOffset));
2821 
2822     armnn::TensorInfo outputDesc({ 2, 16 }, datatype);
2823     boost::multi_array<int16_t, 2> expectedOutput =
2824         MakeTensor<int16_t, 2>(
2825             outputDesc,
2826             armnnUtils::QuantizedVector<int16_t>(
2827                 {
2828                     -0.00396806f,  0.02935200f, -0.00279226f,  0.01599770f,
2829                     -0.00835576f, -0.02117790f,  0.02835120f, -0.01145970f,
2830                      0.00907307f, -0.02440040f, -0.01521910f, -0.02590630f,
2831                      0.00914318f,  0.00415118f,  0.01714700f,  0.01342030f,
2832                     -0.01386900f,  0.02872680f, -0.00334693f,  0.00733398f,
2833                     -0.02879260f, -0.01869260f,  0.01936620f, -0.01154370f,
2834                      0.00422612f, -0.03452320f,  0.00223253f, -0.00957321f,
2835                      0.02106240f,  0.01333100f,  0.01509540f,  0.02168000f
2836                 },
2837                 qScale, qOffset));
2838 
2839     return LstmLayerNoCifgWithPeepholeWithProjectionTestImpl<datatype>(
2840         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput, qScale, qOffset, constantDatatype);
2841 }
2842 
LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2843 LayerTestResult<int16_t, 2> LstmLayerInt16NoCifgNoPeepholeNoProjectionInt16ConstantTest(
2844     armnn::IWorkloadFactory& workloadFactory,
2845     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2846     const armnn::ITensorHandleFactory& tensorHandleFactory)
2847 {
2848     const float qScale = 1.0f;
2849     const int32_t qOffset = 0;
2850 
2851     const armnn::DataType datatype = armnn::DataType::QSymmS16; // datatype & constants set to QSymm16
2852 
2853     armnn::TensorInfo inputDesc({2, 2}, datatype);
2854     boost::multi_array<int16_t , 2> input =
2855         MakeTensor<int16_t , 2>(inputDesc,
2856                                 armnnUtils::QuantizedVector<int16_t>({ 2.f, 3.f, 3.f, 4.f }, qScale, qOffset));
2857 
2858     armnn::TensorInfo outputDesc({2, 4}, datatype);
2859     boost::multi_array<int16_t, 2> expectedOutput =
2860         MakeTensor<int16_t, 2>(
2861             outputDesc,
2862             armnnUtils::QuantizedVector<int16_t>(
2863                 {
2864                     -0.02973187f, 0.12294730f, 0.20885126f, -0.15358765f,
2865                     -0.01854220f, 0.11281417f, 0.24466537f, -0.18262920f
2866                 },
2867                 qScale, qOffset));
2868 
2869     return LstmNoCifgNoPeepholeNoProjectionTestImpl<datatype>(
2870         workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput, qScale, qOffset, datatype);
2871 }
2872 
2873 //
2874 // QuantizedLstm
2875 //
2876 
QuantizedLstmTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2877 LayerTestResult<uint8_t, 2> QuantizedLstmTest(
2878     armnn::IWorkloadFactory& workloadFactory,
2879     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2880     const armnn::ITensorHandleFactory& tensorHandleFactory)
2881 {
2882     armnn::TensorInfo inputDesc({2, 2}, armnn::DataType::QAsymmU8);
2883     boost::multi_array<uint8_t, 2> input = MakeTensor<uint8_t, 2>(inputDesc, std::vector<uint8_t>(
2884         {166, 179, 50, 150}));
2885 
2886     armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QAsymmU8);
2887     boost::multi_array<uint8_t, 2> expectedOutput = MakeTensor<uint8_t, 2>(outputDesc, std::vector<uint8_t>(
2888         {140, 151, 146, 112, 136, 156, 142, 112 }));
2889 
2890     return QuantizedLstmTestImpl(workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2891 }
2892 
2893 // QLSTM
QLstmTest(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2894 LayerTestResult<int8_t, 2> QLstmTest(
2895     armnn::IWorkloadFactory& workloadFactory,
2896     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2897     const armnn::ITensorHandleFactory& tensorHandleFactory)
2898 {
2899     armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8);
2900     boost::multi_array<int8_t, 2> input = MakeTensor<int8_t, 2>(inputDesc, std::vector<int8_t>(
2901             {90, 102, 13, 26, 38, 102, 13, 26, 51, 64}));
2902 
2903     armnn::TensorInfo outputDesc({2, 4}, armnn::DataType::QAsymmS8);
2904     boost::multi_array<int8_t, 2> expectedOutput = MakeTensor<int8_t, 2>(outputDesc, std::vector<int8_t>(
2905             {-15, 21, 14, 20, -15, 15, 5, 27}));
2906 
2907     return QLstmTestImpl(workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2908 }
2909 
QLstmTest1(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2910 LayerTestResult<int8_t, 2> QLstmTest1(
2911     armnn::IWorkloadFactory& workloadFactory,
2912     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2913     const armnn::ITensorHandleFactory& tensorHandleFactory)
2914 {
2915     armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8);
2916     boost::multi_array<int8_t, 2> input = MakeTensor<int8_t, 2>(inputDesc, std::vector<int8_t>(
2917             {90, 102, 13, 26, 38, 102, 13, 26, 51, 64}));
2918 
2919     armnn::TensorInfo outputDesc({2, 3}, armnn::DataType::QAsymmS8);
2920     boost::multi_array<int8_t, 2> expectedOutput = MakeTensor<int8_t, 2>(outputDesc, std::vector<int8_t>(
2921             {127, 127, -108, -67, 127, 127}));
2922 
2923     return QLstmTestImpl1(workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2924 }
2925 
QLstmTest2(armnn::IWorkloadFactory & workloadFactory,const armnn::IBackendInternal::IMemoryManagerSharedPtr & memoryManager,const armnn::ITensorHandleFactory & tensorHandleFactory)2926 LayerTestResult<int8_t, 2> QLstmTest2(
2927     armnn::IWorkloadFactory& workloadFactory,
2928     const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
2929     const armnn::ITensorHandleFactory& tensorHandleFactory)
2930 {
2931     armnn::TensorInfo inputDesc({2, 5}, armnn::DataType::QAsymmS8);
2932     boost::multi_array<int8_t, 2> input = MakeTensor<int8_t, 2>(inputDesc, std::vector<int8_t>(
2933             {90, 102, 13, 26, 38, 102, 13, 26, 51, 64}));
2934 
2935     armnn::TensorInfo outputDesc({2, 3}, armnn::DataType::QAsymmS8);
2936     boost::multi_array<int8_t, 2> expectedOutput = MakeTensor<int8_t, 2>(outputDesc, std::vector<int8_t>(
2937             {127, 127, 127, -128, 127, 127}));
2938 
2939     return QLstmTestImpl2(workloadFactory, memoryManager, tensorHandleFactory, input, expectedOutput);
2940 }