• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2017-2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
25 #define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
26 
27 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
28 #include "tests/framework/Fixture.h"
29 #include "tests/validation/Validation.h"
30 #include "tests/validation/reference/GEMMLowp.h"
31 
32 namespace arm_compute
33 {
34 namespace test
35 {
36 namespace validation
37 {
38 namespace
39 {
40 template <typename U>
fill(U && tensor,int i)41 void fill(U &&tensor, int i)
42 {
43     switch(tensor.data_type())
44     {
45         case DataType::QSYMM8_PER_CHANNEL:
46         {
47             int min_bound = 128;
48             int max_bound = -127;
49             for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++)
50             {
51                 std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
52                 if(bounds.first < min_bound)
53                 {
54                     min_bound = bounds.first;
55                 }
56                 if(bounds.second > max_bound)
57                 {
58                     max_bound = bounds.second;
59                 }
60             }
61             std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
62             library->fill(tensor, distribution, i);
63             break;
64         }
65         case DataType::QASYMM8:
66         {
67             std::uniform_int_distribution<uint32_t> distribution(1, 254);
68             library->fill(tensor, distribution, i);
69             break;
70         }
71         case DataType::S32:
72         {
73             std::uniform_int_distribution<int32_t> distribution(-20000, 20000);
74             library->fill(tensor, distribution, i);
75             break;
76         }
77         case DataType::F16:
78         {
79             arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
80             library->fill(tensor, distribution, i);
81             break;
82         }
83         case DataType::F32:
84         {
85             std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
86             library->fill(tensor, distribution, i);
87             break;
88         }
89         default:
90             library->fill_tensor_uniform(tensor, i);
91     }
92 }
93 
94 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
95 TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
96                                    GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
97                                    QuantizationInfo b_qinfo = QuantizationInfo(), bool reshape_b_only_on_first_run = false)
98 {
99     // Create tensors
100     DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
101 
102     TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1);
103     TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
104     TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1);
105 
106     a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
107 
108     if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
109     {
110         b.info()->set_quantization_info(b_qinfo);
111     }
112     else
113     {
114         b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
115     }
116     TensorType bias;
117     if(is_fused)
118     {
119         TensorShape bias_shape(shape_b[0]);
120         bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
121     }
122 
123     // Create and configure function
124     // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
125     FunctionType gemmlowp;
126     gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false,
127                                                                              output_stage));
128 
129     ARM_COMPUTE_ASSERT(a.info()->is_resizable());
130     ARM_COMPUTE_ASSERT(b.info()->is_resizable());
131     ARM_COMPUTE_ASSERT(output.info()->is_resizable());
132 
133     add_padding_x({ &a, &b, &output });
134 
135     // Allocate tensors
136     a.allocator()->allocate();
137     b.allocator()->allocate();
138     output.allocator()->allocate();
139 
140     ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
141     ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
142     ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
143 
144     // Fill tensors
145     fill(AccessorType(a), 0);
146     fill(AccessorType(b), 1);
147 
148     if(is_fused)
149     {
150         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
151         bias.allocator()->allocate();
152         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
153         fill(AccessorType(bias), 2);
154     }
155 
156     // Run with variable inputs.
157     if(run_twice)
158     {
159         gemmlowp.run();
160         fill(AccessorType(a), 3); // Fill tensors with new seed after run
161         fill(AccessorType(b), 4);
162         if(is_fused)
163         {
164             fill(AccessorType(bias), 5);
165         }
166     }
167 
168     // Compute GEMM function
169     gemmlowp.run();
170     return output;
171 }
172 
173 template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t, bool pretranspose_A = false, bool pretranspose_B = false, bool run_twice = false>
174 SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
175                                                  DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
176 {
177     TensorShape shape_a_to_use = shape_a;
178     if(reinterpret_input_as_3d)
179     {
180         // Collapse the second and third dimension if the input is 3D
181         shape_a_to_use.collapse(2U, 1U);
182     }
183 
184     // Create reference
185     SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1 };
186     SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
187 
188     TensorShape shape_a_to_use_transposed{ shape_a_to_use };
189     TensorShape shape_b_transposed{ shape_b };
190 
191     shape_a_to_use_transposed.set(0, shape_a_to_use[1]);
192     shape_a_to_use_transposed.set(1, shape_a_to_use[0]);
193     shape_b_transposed.set(0, shape_b[1]);
194     shape_b_transposed.set(1, shape_b[0]);
195 
196     SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1 };
197     SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
198 
199     // Fill reference
200     fill(a, 0);
201     fill(b, 1);
202 
203     // Transpose reference if required
204     /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
205        therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
206        in order to be able to call reference implementation that works with (B x M x K) input.
207        Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
208     if(pretranspose_A)
209     {
210         transpose_matrix<TI>(a, a_transposed);
211     }
212 
213     if(pretranspose_B)
214     {
215         transpose_matrix<TW>(b, b_transposed);
216     }
217 
218     // Run with variable inputs.
219     if(run_twice)
220     {
221         reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
222         fill((pretranspose_A) ? a_transposed : a, 3);
223         fill((pretranspose_B) ? b_transposed : b, 4);
224     }
225 
226     return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
227 }
228 }
229 
230 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
231 class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
232 {
233 public:
234     template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_output,int32_t a_offset,int32_t b_offset)235     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
236     {
237         _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset);
238         _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset);
239     }
240 
241 protected:
compute_target(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_output,int32_t a_offset,int32_t b_offset)242     TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
243     {
244         return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_offset,
245                 b_offset);
246     }
247 
compute_reference(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_output,int32_t a_offset,int32_t b_offset)248     SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
249     {
250         return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset);
251     }
252 
253     TensorType            _target{};
254     SimpleTensor<int32_t> _reference{};
255 };
256 
257 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
258 class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture : public framework::Fixture
259 {
260 public:
261     template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_output,int32_t a_offset,int32_t b_offset,GEMMLowpOutputStageInfo output_stage,DataType data_type_b,bool reshape_b_only_on_first_run)262     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b,
263                bool reshape_b_only_on_first_run)
264     {
265         ARM_COMPUTE_ASSERT(output_stage.type != GEMMLowpOutputStageType::NONE);
266         DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8;
267 
268         if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
269         {
270             output_stage.is_quantized_per_channel              = true;
271             const size_t                          num_channels = shape_b[0];
272             std::vector<float>                    scales(num_channels);
273             std::uniform_real_distribution<float> distribution(0.f, 1.f);
274             library->fill(scales, distribution, 0);
275             output_stage.gemmlowp_multipliers.resize(num_channels);
276             output_stage.gemmlowp_shifts.resize(num_channels);
277             for(size_t i = 0; i < num_channels; ++i)
278             {
279                 quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]);
280             }
281 
282             _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales));
283             _target    = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales), reshape_b_only_on_first_run);
284         }
285         else
286         {
287             _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo());
288             _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo(), reshape_b_only_on_first_run);
289         }
290     }
291 
292 protected:
293     TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage,
294                               DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo, bool reshape_b_only_on_first_run = false)
295     {
296         return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_offset,
297                 b_offset,
298                 output_stage, data_type_a, data_type_b, b_qinfo, reshape_b_only_on_first_run);
299     }
300 
compute_reference(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_output,int32_t a_offset,int32_t b_offset,GEMMLowpOutputStageInfo output_stage,DataType data_type_a,DataType data_type_b,QuantizationInfo b_qinfo)301     SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
302                                        GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo)
303     {
304         SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b,
305                                                                                                                             b_qinfo);
306 
307         TensorShape           bias_shape(shape_b[0]);
308         SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
309         (run_twice) ? fill(bias, 5) : fill(bias, 2); // Fill bias with same seed as last run of gemmlowp_target
310 
311         switch(output_stage.type)
312         {
313             case GEMMLowpOutputStageType::QUANTIZE_DOWN:
314                 return reference::gemmlowp_quantize_down_scale<int32_t, TW>(output, bias,
315                                                                             output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
316                 break;
317             case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT:
318                 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TW>(output, bias,
319                                                                                           output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
320                 break;
321             default:
322                 ARM_COMPUTE_ERROR("Not Supported!");
323         }
324     }
325 
326     TensorType       _target{};
327     SimpleTensor<TI> _reference{};
328 };
329 
330 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t>
331 class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public
332     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>
333 {
334 public:
335     template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_output,int32_t a_offset,int32_t b_offset,GEMMLowpOutputStageInfo output_stage,DataType data_type_b)336     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b)
337     {
338         GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>::setup(shape_a, shape_b,
339                 shape_output, a_offset, b_offset, output_stage, data_type_b, false);
340     }
341 };
342 
343 template <typename TensorType, typename AccessorType, typename FunctionType>
344 class GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture : public framework::Fixture
345 {
346 public:
347     template <typename...>
setup(TensorShape shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)348     void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
349     {
350         _target    = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
351         _reference = compute_reference(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
352     }
353 
354 protected:
355     template <typename U>
fill(U && tensor,int i)356     void fill(U &&tensor, int i)
357     {
358         std::uniform_int_distribution<> distribution(-6000, 6000);
359         library->fill(tensor, distribution, i);
360     }
361 
compute_target(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)362     TensorType compute_target(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
363     {
364         TensorShape shape_bias(shape[0]);
365 
366         // Create tensors
367         TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
368         TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
369         TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
370 
371         // Create and configure function
372         FunctionType            output_stage;
373         GEMMLowpOutputStageInfo output_stage_info = GEMMLowpOutputStageInfo();
374         output_stage_info.type                    = GEMMLowpOutputStageType::QUANTIZE_DOWN;
375         output_stage_info.gemmlowp_offset         = result_offset;
376         output_stage_info.gemmlowp_multiplier     = result_mult_int;
377         output_stage_info.gemmlowp_shift          = result_shift;
378         output_stage_info.gemmlowp_min_bound      = min;
379         output_stage_info.gemmlowp_max_bound      = max;
380         output_stage_info.output_data_type        = DataType::QASYMM8;
381         output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info);
382 
383         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
384         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
385 
386         // Allocate tensors
387         a.allocator()->allocate();
388         c.allocator()->allocate();
389 
390         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
391         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
392 
393         // Fill tensor
394         fill(AccessorType(a), 0);
395 
396         if(add_bias)
397         {
398             ARM_COMPUTE_ASSERT(b.info()->is_resizable());
399 
400             // Allocate bias tensor
401             b.allocator()->allocate();
402 
403             ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
404 
405             // Fill tensor
406             fill(AccessorType(b), 1);
407         }
408 
409         // Compute GEMM function
410         output_stage.run();
411         return c;
412     }
413 
compute_reference(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)414     SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
415     {
416         // Create reference
417         TensorShape shape_bias(shape[0]);
418 
419         SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
420         SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
421 
422         // Fill reference
423         fill(a, 0);
424 
425         const std::vector<int32_t> result_mult_int_vec = { result_mult_int };
426         const std::vector<int32_t> result_shift_vec    = { result_shift };
427 
428         if(add_bias)
429         {
430             // Fill bias
431             fill(b, 1);
432 
433             return reference::gemmlowp_quantize_down_scale<int32_t, uint8_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max);
434         }
435         else
436         {
437             return reference::gemmlowp_quantize_down_scale<int32_t, uint8_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max);
438         }
439     }
440 
441     TensorType            _target{};
442     SimpleTensor<uint8_t> _reference{};
443 };
444 
445 template <typename TensorType, typename AccessorType, typename FunctionType>
446 class GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture : public framework::Fixture
447 {
448 public:
449     template <typename...>
setup(TensorShape shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)450     void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
451     {
452         _target    = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
453         _reference = compute_reference(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
454     }
455 
456 protected:
457     template <typename U>
fill(U && tensor,int i)458     void fill(U &&tensor, int i)
459     {
460         std::uniform_int_distribution<> distribution(-6000, 6000);
461         library->fill(tensor, distribution, i);
462     }
463 
compute_target(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)464     TensorType compute_target(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
465     {
466         TensorShape shape_bias(shape[0]);
467 
468         // Create tensors
469         TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
470         TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
471         TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8_SIGNED, 1);
472 
473         // Create and configure function
474         FunctionType            output_stage;
475         GEMMLowpOutputStageInfo output_stage_info = GEMMLowpOutputStageInfo();
476         output_stage_info.type                    = GEMMLowpOutputStageType::QUANTIZE_DOWN;
477         output_stage_info.gemmlowp_offset         = result_offset;
478         output_stage_info.gemmlowp_multiplier     = result_mult_int;
479         output_stage_info.gemmlowp_shift          = result_shift;
480         output_stage_info.gemmlowp_min_bound      = min;
481         output_stage_info.gemmlowp_max_bound      = max;
482         output_stage_info.output_data_type        = DataType::QASYMM8_SIGNED;
483         output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info);
484 
485         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
486         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
487 
488         // Allocate tensors
489         a.allocator()->allocate();
490         c.allocator()->allocate();
491 
492         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
493         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
494 
495         // Fill tensor
496         fill(AccessorType(a), 0);
497 
498         if(add_bias)
499         {
500             ARM_COMPUTE_ASSERT(b.info()->is_resizable());
501 
502             // Allocate bias tensor
503             b.allocator()->allocate();
504 
505             ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
506 
507             // Fill tensor
508             fill(AccessorType(b), 1);
509         }
510 
511         // Compute GEMM function
512         output_stage.run();
513         return c;
514     }
515 
compute_reference(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)516     SimpleTensor<int8_t> compute_reference(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
517     {
518         // Create reference
519         TensorShape shape_bias(shape[0]);
520 
521         SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
522         SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
523 
524         // Fill reference
525         fill(a, 0);
526 
527         const std::vector<int32_t> result_mult_int_vec = { result_mult_int };
528         const std::vector<int32_t> result_shift_vec    = { result_shift };
529 
530         if(add_bias)
531         {
532             // Fill bias
533             fill(b, 1);
534 
535             return reference::gemmlowp_quantize_down_scale<int32_t, int8_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max);
536         }
537         else
538         {
539             return reference::gemmlowp_quantize_down_scale<int32_t, int8_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max);
540         }
541     }
542 
543     TensorType           _target{};
544     SimpleTensor<int8_t> _reference{};
545 };
546 
547 template <typename TensorType, typename AccessorType, typename FunctionType>
548 class GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture : public framework::Fixture
549 {
550 public:
551     template <typename...>
setup(TensorShape shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)552     void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
553     {
554         _target    = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
555         _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
556     }
557 
558 protected:
559     template <typename U>
fill(U && tensor,int i)560     void fill(U &&tensor, int i)
561     {
562         std::uniform_int_distribution<> distribution(-6000, 6000);
563         library->fill(tensor, distribution, i);
564     }
565 
compute_target(const TensorShape & shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)566     TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
567     {
568         TensorShape shape_bias(shape[0]);
569 
570         // Create tensors
571         TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
572         TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
573         TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8_SIGNED, 1);
574 
575         // Create and configure function
576         FunctionType output_stage;
577         output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
578 
579         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
580         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
581 
582         // Allocate tensors
583         a.allocator()->allocate();
584         c.allocator()->allocate();
585 
586         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
587         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
588 
589         // Fill tensor
590         fill(AccessorType(a), 0);
591 
592         if(add_bias)
593         {
594             ARM_COMPUTE_ASSERT(b.info()->is_resizable());
595 
596             // Allocate bias tensor
597             b.allocator()->allocate();
598 
599             ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
600 
601             // Fill tensor
602             fill(AccessorType(b), 1);
603         }
604 
605         // Compute GEMM function
606         output_stage.run();
607         return c;
608     }
609 
compute_reference(const TensorShape & shape,int32_t result_fixed_point_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)610     SimpleTensor<int8_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max,
611                                            bool add_bias)
612     {
613         // Create reference
614         TensorShape shape_bias(shape[0]);
615 
616         SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
617         SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
618 
619         // Fill reference
620         fill(a, 0);
621 
622         const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
623         const std::vector<int32_t> result_shift_vec                  = { result_shift };
624 
625         if(add_bias)
626         {
627             // Fill bias
628             fill(b, 1);
629 
630             return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int8_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
631         }
632         else
633         {
634             return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int8_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
635         }
636     }
637 
638     TensorType           _target{};
639     SimpleTensor<int8_t> _reference{};
640 };
641 
642 template <typename TensorType, typename AccessorType, typename FunctionType>
643 class GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture : public framework::Fixture
644 {
645 public:
646     template <typename...>
setup(TensorShape shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)647     void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
648     {
649         _target    = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
650         _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
651     }
652 
653 protected:
654     template <typename U>
fill(U && tensor,int i)655     void fill(U &&tensor, int i)
656     {
657         std::uniform_int_distribution<> distribution(-6000, 6000);
658         library->fill(tensor, distribution, i);
659     }
660 
compute_target(const TensorShape & shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)661     TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
662     {
663         TensorShape shape_bias(shape[0]);
664 
665         // Create tensors
666         TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
667         TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
668         TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
669 
670         // Create and configure function
671         FunctionType output_stage;
672         output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
673 
674         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
675         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
676 
677         // Allocate tensors
678         a.allocator()->allocate();
679         c.allocator()->allocate();
680 
681         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
682         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
683 
684         // Fill tensor
685         fill(AccessorType(a), 0);
686 
687         if(add_bias)
688         {
689             ARM_COMPUTE_ASSERT(b.info()->is_resizable());
690 
691             // Allocate bias tensor
692             b.allocator()->allocate();
693 
694             ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
695 
696             // Fill tensor
697             fill(AccessorType(b), 1);
698         }
699 
700         // Compute GEMM function
701         output_stage.run();
702         return c;
703     }
704 
compute_reference(const TensorShape & shape,int32_t result_fixed_point_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)705     SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max,
706                                             bool add_bias)
707     {
708         // Create reference
709         TensorShape shape_bias(shape[0]);
710 
711         SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
712         SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
713 
714         // Fill reference
715         fill(a, 0);
716 
717         const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
718         const std::vector<int32_t> result_shift_vec                  = { result_shift };
719 
720         if(add_bias)
721         {
722             // Fill bias
723             fill(b, 1);
724 
725             return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, uint8_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
726         }
727         else
728         {
729             return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, uint8_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
730         }
731     }
732 
733     TensorType            _target{};
734     SimpleTensor<uint8_t> _reference{};
735 };
736 
737 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
738 class GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture : public framework::Fixture
739 {
740 public:
741     template <typename...>
setup(DataType data_type,TensorShape shape,float result_real_multiplier,int32_t result_offset,int32_t min,int32_t max,bool add_bias)742     void setup(DataType data_type, TensorShape shape, float result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
743     {
744         _target    = compute_target(data_type, shape, result_real_multiplier, result_offset, min, max, add_bias);
745         _reference = compute_reference(shape, result_real_multiplier, result_offset, min, max, add_bias);
746     }
747 
748 protected:
749     template <typename U>
fill(U && tensor,int i)750     void fill(U &&tensor, int i)
751     {
752         // To avoid data all being clampped
753         std::uniform_int_distribution<> distribution(-500, 500);
754         library->fill(tensor, distribution, i);
755     }
756 
compute_target(DataType data_type,const TensorShape & shape,float result_multiplier,int32_t result_offset,int32_t min,int32_t max,bool add_bias)757     TensorType compute_target(DataType data_type, const TensorShape &shape, float result_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
758     {
759         TensorShape shape_bias(shape[0]);
760 
761         // Create tensors
762         TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
763         TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
764         TensorType c = create_tensor<TensorType>(shape, data_type, 1);
765 
766         // create output stage info
767         GEMMLowpOutputStageInfo info;
768         info.gemmlowp_max_bound       = max;
769         info.gemmlowp_min_bound       = min;
770         info.gemmlowp_real_multiplier = result_multiplier;
771         info.gemmlowp_offset          = result_offset;
772         info.type                     = GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT;
773         info.output_data_type         = data_type;
774 
775         // Create and configure function
776         FunctionType output_stage;
777         output_stage.configure(&a, add_bias ? &b : nullptr, &c, info);
778 
779         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
780         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
781 
782         // Allocate tensors
783         a.allocator()->allocate();
784         c.allocator()->allocate();
785 
786         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
787         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
788 
789         // Fill tensor
790         fill(AccessorType(a), 0);
791 
792         if(add_bias)
793         {
794             ARM_COMPUTE_ASSERT(b.info()->is_resizable());
795 
796             // Allocate bias tensor
797             b.allocator()->allocate();
798 
799             ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
800 
801             // Fill tensor
802             fill(AccessorType(b), 1);
803         }
804 
805         // Compute GEMM function
806         output_stage.run();
807         return c;
808     }
809 
compute_reference(const TensorShape & shape,float_t result_real_multiplier,int32_t result_offset,int32_t min,int32_t max,bool add_bias)810     SimpleTensor<T> compute_reference(const TensorShape &shape, float_t result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
811     {
812         // Create reference
813         TensorShape shape_bias(shape[0]);
814 
815         SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
816         SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
817 
818         // Fill reference
819         fill(a, 0);
820 
821         const std::vector<float_t> result_float_multiplier_vec = { result_real_multiplier };
822 
823         if(add_bias)
824         {
825             // Fill bias
826             fill(b, 1);
827 
828             return reference::gemmlowp_quantize_down_scale_by_float<int32_t, T>(a, b, result_float_multiplier_vec, result_offset, min, max);
829         }
830         else
831         {
832             return reference::gemmlowp_quantize_down_scale_by_float<int32_t, T>(a, result_float_multiplier_vec, result_offset, min, max);
833         }
834     }
835 
836     TensorType      _target{};
837     SimpleTensor<T> _reference{};
838 };
839 
840 template <typename TensorType, typename AccessorType, typename FunctionType>
841 class GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture : public framework::Fixture
842 {
843 public:
844     template <typename...>
setup(TensorShape shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t min,int32_t max,bool add_bias)845     void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
846     {
847         _target    = compute_target(shape, result_fixedpoint_multiplier, result_shift, min, max, add_bias);
848         _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, min, max, add_bias);
849     }
850 
851 protected:
852     template <typename U>
fill(U && tensor,int i)853     void fill(U &&tensor, int i)
854     {
855         std::uniform_int_distribution<> distribution(-6000, 6000);
856         library->fill(tensor, distribution, i);
857     }
858 
compute_target(const TensorShape & shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t min,int32_t max,bool add_bias)859     TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
860     {
861         TensorShape shape_bias(shape[0]);
862 
863         // Create tensors
864         TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
865         TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
866         TensorType c = create_tensor<TensorType>(shape, DataType::QSYMM16, 1);
867 
868         // Create and configure function
869         FunctionType output_stage;
870         output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, min, max);
871 
872         ARM_COMPUTE_ASSERT(a.info()->is_resizable());
873         ARM_COMPUTE_ASSERT(c.info()->is_resizable());
874 
875         // Allocate tensors
876         a.allocator()->allocate();
877         c.allocator()->allocate();
878 
879         ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
880         ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
881 
882         // Fill tensor
883         fill(AccessorType(a), 0);
884 
885         if(add_bias)
886         {
887             ARM_COMPUTE_ASSERT(b.info()->is_resizable());
888 
889             // Allocate bias tensor
890             b.allocator()->allocate();
891 
892             ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
893 
894             // Fill tensor
895             fill(AccessorType(b), 1);
896         }
897 
898         // Compute GEMM function
899         output_stage.run();
900         return c;
901     }
902 
compute_reference(const TensorShape & shape,int32_t result_fixed_point_multiplier,int32_t result_shift,int32_t min,int32_t max,bool add_bias)903     SimpleTensor<int16_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t min, int32_t max,
904                                             bool add_bias)
905     {
906         // Create reference
907         TensorShape shape_bias(shape[0]);
908 
909         SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
910         SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
911 
912         // Fill reference
913         fill(a, 0);
914 
915         const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
916         const std::vector<int32_t> result_shift_vec                  = { result_shift };
917 
918         if(add_bias)
919         {
920             // Fill bias
921             fill(b, 1);
922 
923             return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int16_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, 0, min, max);
924         }
925         else
926         {
927             return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int16_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, 0, min, max);
928         }
929     }
930 
931     TensorType            _target{};
932     SimpleTensor<int16_t> _reference{};
933 };
934 
935 template <typename TensorType, typename AccessorType, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
936 class GEMMLowpMatrixMultiplyReshapedValidationFixture : public framework::Fixture
937 {
938 public:
939     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,DataType data_type)940     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
941                bool interleave_rhs, DataType data_type)
942     {
943         GEMMLHSMatrixInfo lhs_info;
944         lhs_info.m0         = m0;
945         lhs_info.k0         = k0;
946         lhs_info.v0         = v0;
947         lhs_info.interleave = interleave_lhs;
948         lhs_info.transpose  = false;
949 
950         GEMMRHSMatrixInfo rhs_info;
951         rhs_info.n0         = n0;
952         rhs_info.k0         = k0;
953         rhs_info.h0         = h0;
954         rhs_info.interleave = interleave_rhs;
955         rhs_info.transpose  = true;
956 
957         // Set the tensor shapes for LHS and RHS matrices
958         const TensorShape lhs_shape(k, m, batch_size);
959         const TensorShape rhs_shape(n, k, batch_size);
960 
961         _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
962         _reference = compute_reference(lhs_shape, rhs_shape, data_type);
963     }
964 
965 protected:
966     template <typename U>
fill(U && tensor,int i)967     void fill(U &&tensor, int i)
968     {
969         switch(tensor.data_type())
970         {
971             case DataType::QASYMM8:
972             {
973                 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
974                 std::uniform_int_distribution<> distribution(1, 254);
975                 library->fill(tensor, distribution, i);
976             }
977             break;
978             case DataType::QASYMM8_SIGNED:
979             {
980                 std::uniform_int_distribution<> distribution(-127, 126);
981                 library->fill(tensor, distribution, i);
982             }
983             break;
984             default:
985                 ARM_COMPUTE_ERROR("Unsupported data type");
986         }
987     }
988 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type)989     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
990     {
991         // Create tensors
992         TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
993         TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
994         TensorType lhs_reshaped;
995         TensorType rhs_reshaped;
996         TensorType dst;
997 
998         const unsigned int M = lhs_shape[1];
999         const unsigned int N = rhs_shape[0];
1000         const unsigned int K = lhs_shape[0];
1001 
1002         // The output tensor will be auto-initialized within the function
1003 
1004         // Create and configure function
1005         ReshapeLHSOperatorType reshape_lhs;
1006         ReshapeRHSOperatorType reshape_rhs;
1007         GEMMFunctionType       gemm;
1008         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1009         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1010         gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
1011 
1012         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1013         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1014 
1015         add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &dst });
1016 
1017         // Allocate tensors
1018         lhs.allocator()->allocate();
1019         rhs.allocator()->allocate();
1020         lhs_reshaped.allocator()->allocate();
1021         rhs_reshaped.allocator()->allocate();
1022         dst.allocator()->allocate();
1023 
1024         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1025         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1026         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1027         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1028         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1029 
1030         // Fill tensors
1031         fill(AccessorType(lhs), 0);
1032         fill(AccessorType(rhs), 1);
1033 
1034         // Compute GEMM
1035         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1036         reshape_lhs.run(reshape_lhs_pack);
1037         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1038         reshape_rhs.run(reshape_rhs_pack);
1039         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1040         gemm.run(gemm_pack);
1041 
1042         return dst;
1043     }
1044 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type)1045     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
1046     {
1047         TensorShape dst_shape = lhs_shape;
1048         dst_shape[0]          = rhs_shape[0];
1049         dst_shape[1]          = lhs_shape[1];
1050 
1051         switch(data_type)
1052         {
1053             case DataType::QASYMM8:
1054             {
1055                 // Create reference
1056                 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1057                 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1058 
1059                 // Fill reference
1060                 fill(lhs, 0);
1061                 fill(rhs, 1);
1062 
1063                 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1064             }
1065             case DataType::QASYMM8_SIGNED:
1066             {
1067                 // Create reference
1068                 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1069                 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1070 
1071                 // Fill reference
1072                 fill(lhs, 0);
1073                 fill(rhs, 1);
1074 
1075                 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1076             }
1077             default:
1078                 ARM_COMPUTE_ERROR("Unsupported data type");
1079         }
1080     }
1081 
1082     TensorType            _target{};
1083     SimpleTensor<int32_t> _reference{};
1084 };
1085 
1086 template <typename TensorType, typename AccessorType, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1087 class GEMMLowpMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
1088 {
1089 public:
1090     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,DataType data_type)1091     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
1092                bool interleave_lhs, bool interleave_rhs, DataType data_type)
1093     {
1094         GEMMLHSMatrixInfo lhs_info;
1095         lhs_info.m0         = m0;
1096         lhs_info.k0         = k0;
1097         lhs_info.v0         = v0;
1098         lhs_info.interleave = interleave_lhs;
1099         lhs_info.transpose  = false;
1100 
1101         GEMMRHSMatrixInfo rhs_info;
1102         rhs_info.n0         = n0;
1103         rhs_info.k0         = k0;
1104         rhs_info.h0         = h0;
1105         rhs_info.interleave = interleave_rhs;
1106         rhs_info.transpose  = true;
1107 
1108         // In case of GEMM3D, m is the product between m_w and m_h
1109         const unsigned int m = m_w * m_h;
1110 
1111         // Set the tensor shapes for LHS and RHS matrices
1112         const TensorShape lhs_shape(k, m, batch_size);
1113         const TensorShape rhs_shape(n, k, batch_size);
1114 
1115         _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h, data_type);
1116         _reference = compute_reference(lhs_shape, rhs_shape, m_h, data_type);
1117     }
1118 
1119 protected:
1120     template <typename U>
fill(U && tensor,int i)1121     void fill(U &&tensor, int i)
1122     {
1123         switch(tensor.data_type())
1124         {
1125             case DataType::QASYMM8:
1126             {
1127                 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1128                 std::uniform_int_distribution<> distribution(1, 254);
1129                 library->fill(tensor, distribution, i);
1130             }
1131             break;
1132             case DataType::QASYMM8_SIGNED:
1133             {
1134                 std::uniform_int_distribution<> distribution(-127, 126);
1135                 library->fill(tensor, distribution, i);
1136             }
1137             break;
1138             default:
1139                 ARM_COMPUTE_ERROR("Unsupported data type");
1140         }
1141     }
1142 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,unsigned int m_h,DataType data_type)1143     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h,
1144                               DataType data_type)
1145     {
1146         // Create tensors
1147         TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1148         TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1149         TensorType lhs_reshaped;
1150         TensorType rhs_reshaped;
1151         TensorType dst;
1152 
1153         const unsigned int M = lhs_shape[1];
1154         const unsigned int N = rhs_shape[0];
1155         const unsigned int K = lhs_shape[0];
1156 
1157         // The output tensor will be auto-initialized within the function
1158 
1159         // Create and configure function
1160         ReshapeLHSOperatorType reshape_lhs;
1161         ReshapeRHSOperatorType reshape_rhs;
1162         GEMMFunctionType       gemm;
1163         reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1164         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1165         gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
1166 
1167         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1168         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1169 
1170         add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &dst });
1171 
1172         // Allocate tensors
1173         lhs.allocator()->allocate();
1174         rhs.allocator()->allocate();
1175         lhs_reshaped.allocator()->allocate();
1176         rhs_reshaped.allocator()->allocate();
1177         dst.allocator()->allocate();
1178 
1179         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1180         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1181         ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1182         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1183         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1184 
1185         // Fill tensors
1186         fill(AccessorType(lhs), 0);
1187         fill(AccessorType(rhs), 1);
1188 
1189         // Compute GEMM
1190         ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1191         reshape_lhs.run(reshape_lhs_pack);
1192         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1193         reshape_rhs.run(reshape_rhs_pack);
1194         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1195         gemm.run(gemm_pack);
1196 
1197         return dst;
1198     }
1199 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,unsigned int m_h,DataType data_type)1200     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h, DataType data_type)
1201     {
1202         TensorShape dst_shape = lhs_shape;
1203         dst_shape.set(0, rhs_shape[0]);
1204         dst_shape.set(1, lhs_shape[1] / m_h);
1205         dst_shape.set(2, m_h);
1206         dst_shape.set(3, lhs_shape[2]);
1207 
1208         switch(data_type)
1209         {
1210             case DataType::QASYMM8:
1211             {
1212                 // Create reference
1213                 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1214                 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1215 
1216                 // Fill reference
1217                 fill(lhs, 0);
1218                 fill(rhs, 1);
1219 
1220                 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1221             }
1222             case DataType::QASYMM8_SIGNED:
1223             {
1224                 // Create reference
1225                 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1226                 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1227 
1228                 // Fill reference
1229                 fill(lhs, 0);
1230                 fill(rhs, 1);
1231 
1232                 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1233             }
1234             default:
1235                 ARM_COMPUTE_ERROR("Unsupported data type");
1236         }
1237     }
1238 
1239     TensorType            _target{};
1240     SimpleTensor<int32_t> _reference{};
1241 };
1242 
1243 template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1244 class GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
1245 {
1246 public:
1247     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,DataType data_type)1248     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1249                unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
1250     {
1251         GEMMLHSMatrixInfo lhs_info;
1252         lhs_info.m0 = m0;
1253         lhs_info.k0 = k0;
1254 
1255         GEMMRHSMatrixInfo rhs_info;
1256         rhs_info.n0         = n0;
1257         rhs_info.k0         = k0;
1258         rhs_info.h0         = h0;
1259         rhs_info.interleave = interleave_rhs;
1260         rhs_info.transpose  = transpose_rhs;
1261 
1262         // Set the tensor shapes for LHS and RHS matrices
1263         const TensorShape lhs_shape(k, m, batch_size);
1264         const TensorShape rhs_shape(n, k, batch_size);
1265 
1266         _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
1267         _reference = compute_reference(lhs_shape, rhs_shape, data_type);
1268     }
1269 
1270 protected:
1271     template <typename U>
fill(U && tensor,int i)1272     void fill(U &&tensor, int i)
1273     {
1274         switch(tensor.data_type())
1275         {
1276             case DataType::QASYMM8:
1277             {
1278                 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1279                 std::uniform_int_distribution<> distribution(1, 254);
1280                 library->fill(tensor, distribution, i);
1281             }
1282             break;
1283             case DataType::QASYMM8_SIGNED:
1284             {
1285                 std::uniform_int_distribution<> distribution(-127, 126);
1286                 library->fill(tensor, distribution, i);
1287             }
1288             break;
1289             default:
1290                 ARM_COMPUTE_ERROR("Unsupported data type");
1291         }
1292     }
1293 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type)1294     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
1295                               const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
1296     {
1297         // Create tensors
1298         TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1299         TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1300         TensorType rhs_reshaped;
1301         TensorType dst;
1302 
1303         const unsigned int M = lhs_shape[1];
1304         const unsigned int N = rhs_shape[0];
1305         const unsigned int K = lhs_shape[0];
1306 
1307         GEMMKernelInfo gemm_info;
1308         gemm_info.m        = M;
1309         gemm_info.n        = N;
1310         gemm_info.k        = K;
1311         gemm_info.lhs_info = lhs_info;
1312         gemm_info.rhs_info = rhs_info;
1313         // The output tensor will be auto-initialized within the function
1314 
1315         // Create and configure function
1316         ReshapeRHSOperatorType reshape_rhs;
1317         GEMMFunctionType       gemm;
1318         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1319         gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
1320 
1321         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1322         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1323 
1324         add_padding_x({ &lhs, &rhs, &rhs_reshaped, &dst });
1325 
1326         // Allocate tensors
1327         lhs.allocator()->allocate();
1328         rhs.allocator()->allocate();
1329         rhs_reshaped.allocator()->allocate();
1330         dst.allocator()->allocate();
1331 
1332         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1333         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1334         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1335         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1336 
1337         // Fill tensors
1338         fill(AccessorType(lhs), 0);
1339         fill(AccessorType(rhs), 1);
1340 
1341         // Compute GEMM
1342         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1343         reshape_rhs.run(reshape_rhs_pack);
1344         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1345         gemm.run(gemm_pack);
1346 
1347         return dst;
1348     }
1349 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type)1350     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
1351     {
1352         TensorShape dst_shape = lhs_shape;
1353         dst_shape[0]          = rhs_shape[0];
1354         dst_shape[1]          = lhs_shape[1];
1355 
1356         if(data_type == DataType::QASYMM8)
1357         {
1358             // Create reference
1359             SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1360             SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1361 
1362             // Fill reference
1363             fill(lhs, 0);
1364             fill(rhs, 1);
1365 
1366             return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1367         }
1368         else
1369         {
1370             // Create reference
1371             SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1372             SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1373 
1374             // Fill reference
1375             fill(lhs, 0);
1376             fill(rhs, 1);
1377 
1378             return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1379         }
1380     }
1381 
1382     TensorType            _target{};
1383     SimpleTensor<int32_t> _reference{};
1384 };
1385 
1386 template <typename T, typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType, typename ReduceOperation, typename CastOperation>
1387 class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture : public framework::Fixture
1388 {
1389 public:
1390     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool broadcast_bias,DataType data_type)1391     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1392                unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool broadcast_bias, DataType data_type)
1393     {
1394         GEMMLowpOutputStageInfo output_stage;
1395         output_stage.type                    = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
1396         output_stage.output_data_type        = data_type;
1397         output_stage.gemmlowp_multipliers    = std::vector<int32_t> { 1 };
1398         output_stage.gemmlowp_shifts         = std::vector<int32_t> { 1 };
1399         output_stage.gemmlowp_multipliers[0] = 1;
1400         output_stage.gemmlowp_shifts[0]      = 1;
1401         output_stage.gemmlowp_offset         = 0;
1402         constexpr float scale                = 0.001f;
1403         quantization::calculate_quantized_multiplier(scale, &output_stage.gemmlowp_multipliers[0], &output_stage.gemmlowp_shifts[0]);
1404         output_stage.gemmlowp_min_bound = -100;
1405         output_stage.gemmlowp_max_bound = 100;
1406 
1407         GEMMLHSMatrixInfo lhs_info;
1408         lhs_info.m0 = m0;
1409         lhs_info.k0 = k0;
1410 
1411         GEMMRHSMatrixInfo rhs_info;
1412         rhs_info.n0         = n0;
1413         rhs_info.k0         = k0;
1414         rhs_info.h0         = h0;
1415         rhs_info.interleave = interleave_rhs;
1416         rhs_info.transpose  = transpose_rhs;
1417 
1418         int a_offset = 1;
1419         int b_offset = 1;
1420 
1421         // Set the tensor shapes for LHS and RHS matrices
1422         const TensorShape lhs_shape(k, m, batch_size);
1423         const TensorShape rhs_shape(n, k, batch_size);
1424         const TensorShape bias_shape(n,
1425                                      broadcast_bias ? 1 : m,
1426                                      broadcast_bias ? 1 : batch_size);
1427 
1428         _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, output_stage, a_offset, b_offset);
1429         if(gemm_validated == true)
1430         {
1431             _reference = compute_reference(lhs_shape, rhs_shape, bias_shape, data_type, output_stage, a_offset, b_offset);
1432         }
1433     }
1434 
1435 protected:
1436     template <typename U>
fill(U && tensor,int i)1437     void fill(U &&tensor, int i)
1438     {
1439         switch(tensor.data_type())
1440         {
1441             case DataType::QASYMM8:
1442             {
1443                 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1444                 std::uniform_int_distribution<> distribution(1, 254);
1445                 library->fill(tensor, distribution, i);
1446             }
1447             break;
1448             case DataType::QASYMM8_SIGNED:
1449             {
1450                 std::uniform_int_distribution<> distribution(-127, 126);
1451                 library->fill(tensor, distribution, i);
1452             }
1453             break;
1454             case DataType::S32:
1455             {
1456                 std::uniform_int_distribution<> distribution(-10000, 10000);
1457                 library->fill(tensor, distribution, i);
1458             }
1459             break;
1460             default:
1461                 ARM_COMPUTE_ERROR("Unsupported data type");
1462         }
1463     }
1464 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,GEMMLowpOutputStageInfo output_stage,const int a_offset,const int b_offset)1465     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info,
1466                               const GEMMRHSMatrixInfo &rhs_info, DataType data_type, GEMMLowpOutputStageInfo output_stage, const int a_offset, const int b_offset)
1467     {
1468         // Create tensors
1469         TensorType lhs  = create_tensor<TensorType>(lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset));
1470         TensorType rhs  = create_tensor<TensorType>(rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset));
1471         TensorType bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
1472         TensorType dst;
1473         TensorType rhs_reshaped;
1474 
1475         const unsigned int M = lhs_shape[1];
1476         const unsigned int N = rhs_shape[0];
1477         const unsigned int K = lhs_shape[0];
1478 
1479         // Tensors for precomputing sum of lhs rows / rhs columns
1480         TensorType vec_sum_rows = create_tensor<TensorType>(TensorShape(M, 1, lhs_shape[2]), DataType::S32, 1);
1481         TensorType vec_sum_cols = create_tensor<TensorType>(TensorShape(N, 1, rhs_shape[2]), DataType::S32, 1);
1482 
1483         GEMMKernelInfo gemm_info;
1484         gemm_info.m            = M;
1485         gemm_info.n            = N;
1486         gemm_info.k            = K;
1487         gemm_info.lhs_info     = lhs_info;
1488         gemm_info.rhs_info     = rhs_info;
1489         gemm_info.output_stage = output_stage;
1490         gemm_info.a_offset     = a_offset;
1491         gemm_info.b_offset     = b_offset;
1492         // The output tensor will be auto-initialized within the function
1493 
1494         // Create and configure function
1495         ReshapeRHSOperatorType reshape_rhs;
1496         GEMMFunctionType       gemm;
1497         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1498 
1499         // If GEMM is not validated, do not try to run. The validation will check
1500         // if the technology supports this extension. If not, the test will be skipped.
1501         // If it supports, the test will fail anyway because target and reference
1502         // will not match.
1503         gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info()));
1504         if(gemm_validated == true)
1505         {
1506             gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info());
1507 
1508             ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1509             ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1510             ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1511 
1512             // Allocate tensors
1513             lhs.allocator()->allocate();
1514             rhs.allocator()->allocate();
1515             rhs_reshaped.allocator()->allocate();
1516             bias.allocator()->allocate();
1517             vec_sum_cols.allocator()->allocate();
1518             vec_sum_rows.allocator()->allocate();
1519             dst.allocator()->allocate();
1520 
1521             ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1522             ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1523             ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1524             ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1525             ARM_COMPUTE_ASSERT(!vec_sum_cols.info()->is_resizable());
1526             ARM_COMPUTE_ASSERT(!vec_sum_rows.info()->is_resizable());
1527             ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1528 
1529             // Fill tensors
1530             fill(AccessorType(lhs), 0);
1531             fill(AccessorType(rhs), 1);
1532             fill(AccessorType(bias), 2);
1533 
1534             TensorType    lhs_32 = create_tensor<TensorType>(lhs_shape, DataType::S32, 1);
1535             TensorType    rhs_32 = create_tensor<TensorType>(rhs_shape, DataType::S32, 1);
1536             CastOperation cast_lhs;
1537             CastOperation cast_rhs;
1538             cast_lhs.configure(&lhs, &lhs_32, ConvertPolicy::SATURATE);
1539             cast_rhs.configure(&rhs, &rhs_32, ConvertPolicy::SATURATE);
1540             lhs_32.allocator()->allocate();
1541             rhs_32.allocator()->allocate();
1542             cast_lhs.run();
1543             cast_rhs.run();
1544 
1545             ReduceOperation lhs_sum_rows;
1546             ReduceOperation rhs_sum_cols;
1547 
1548             lhs_sum_rows.configure(&lhs_32, &vec_sum_rows, 0, ReductionOperation::SUM, false);
1549             rhs_sum_cols.configure(&rhs_32, &vec_sum_cols, 1, ReductionOperation::SUM);
1550 
1551             lhs_sum_rows.run();
1552             rhs_sum_cols.run();
1553 
1554             // Compute GEMM
1555             ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1556             reshape_rhs.run(reshape_rhs_pack);
1557             ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst }, { ACL_VEC_COL_SUM, &vec_sum_cols }, { ACL_VEC_ROW_SUM, &vec_sum_rows } });
1558             gemm.run(gemm_pack);
1559         }
1560 
1561         return dst;
1562     }
1563 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,DataType data_type,GEMMLowpOutputStageInfo output_stage,const int a_offset,const int b_offset)1564     SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, GEMMLowpOutputStageInfo output_stage,
1565                                       const int a_offset, const int b_offset)
1566     {
1567         TensorShape dst_shape = lhs_shape;
1568         dst_shape[0]          = rhs_shape[0];
1569         dst_shape[1]          = lhs_shape[1];
1570 
1571         // Create reference
1572         SimpleTensor<T>       lhs{ lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset) };
1573         SimpleTensor<T>       rhs{ rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset) };
1574         SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
1575         SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
1576         SimpleTensor<T>       dst_final{ dst_shape, data_type, 1 };
1577 
1578         // Fill reference
1579         fill(lhs, 0);
1580         fill(rhs, 1);
1581         fill(bias, 2);
1582 
1583         dst       = reference::gemmlowp_matrix_multiply_core<int32_t, T>(lhs, rhs, dst_shape, a_offset, b_offset);
1584         dst_final = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, T>(dst, bias,
1585                                                                                       output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
1586         return dst_final;
1587     }
1588 
1589     bool            gemm_validated = true;
1590     TensorType      _target{};
1591     SimpleTensor<T> _reference{};
1592 };
1593 
1594 template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1595 class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture : public framework::Fixture
1596 {
1597 public:
1598     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,DataType data_type)1599     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1600                unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
1601     {
1602         GEMMLHSMatrixInfo lhs_info;
1603         lhs_info.m0 = m0;
1604         lhs_info.k0 = k0;
1605 
1606         GEMMRHSMatrixInfo rhs_info;
1607         rhs_info.n0         = n0;
1608         rhs_info.k0         = k0;
1609         rhs_info.h0         = h0;
1610         rhs_info.interleave = interleave_rhs;
1611         rhs_info.transpose  = transpose_rhs;
1612 
1613         // Set the tensor shapes for LHS and RHS matrices
1614         const TensorShape lhs_shape(k, m, batch_size);
1615         const TensorShape rhs_shape(n, k, batch_size);
1616 
1617         _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
1618         if(gemm_validated == true)
1619         {
1620             _reference = compute_reference(lhs_shape, rhs_shape, data_type);
1621         }
1622     }
1623 
1624 protected:
1625     template <typename U>
fill(U && tensor,int i)1626     void fill(U &&tensor, int i)
1627     {
1628         switch(tensor.data_type())
1629         {
1630             case DataType::QASYMM8:
1631             {
1632                 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1633                 std::uniform_int_distribution<> distribution(1, 254);
1634                 library->fill(tensor, distribution, i);
1635             }
1636             break;
1637             case DataType::QASYMM8_SIGNED:
1638             {
1639                 std::uniform_int_distribution<> distribution(-127, 126);
1640                 library->fill(tensor, distribution, i);
1641             }
1642             break;
1643             default:
1644                 ARM_COMPUTE_ERROR("Unsupported data type");
1645         }
1646     }
1647 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type)1648     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
1649                               const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
1650     {
1651         // Create tensors
1652         TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1653         TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1654         TensorType rhs_reshaped;
1655         TensorType dst;
1656 
1657         const unsigned int M = lhs_shape[1];
1658         const unsigned int N = rhs_shape[0];
1659         const unsigned int K = lhs_shape[0];
1660 
1661         GEMMKernelInfo gemm_info;
1662         gemm_info.m        = M;
1663         gemm_info.n        = N;
1664         gemm_info.k        = K;
1665         gemm_info.lhs_info = lhs_info;
1666         gemm_info.rhs_info = rhs_info;
1667         // The output tensor will be auto-initialized within the function
1668 
1669         // Create and configure function
1670         ReshapeRHSOperatorType reshape_rhs;
1671         GEMMFunctionType       gemm;
1672         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1673 
1674         // If GEMM is not validated, do not try to run. The validation will check
1675         // if the technology supports this extension. If not, the test will be skipped.
1676         // If it supports, the test will fail anyway because target and reference
1677         // will not match.
1678         gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr));
1679         if(gemm_validated == true)
1680         {
1681             gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr);
1682 
1683             ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1684             ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1685 
1686             // Allocate tensors
1687             lhs.allocator()->allocate();
1688             rhs.allocator()->allocate();
1689             rhs_reshaped.allocator()->allocate();
1690             dst.allocator()->allocate();
1691 
1692             ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1693             ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1694             ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1695             ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1696 
1697             // Fill tensors
1698             fill(AccessorType(lhs), 0);
1699             fill(AccessorType(rhs), 1);
1700 
1701             // Compute GEMM
1702             ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1703             reshape_rhs.run(reshape_rhs_pack);
1704             ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1705             gemm.run(gemm_pack);
1706         }
1707 
1708         return dst;
1709     }
1710 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type)1711     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
1712     {
1713         TensorShape dst_shape = lhs_shape;
1714         dst_shape[0]          = rhs_shape[0];
1715         dst_shape[1]          = lhs_shape[1];
1716 
1717         if(data_type == DataType::QASYMM8)
1718         {
1719             // Create reference
1720             SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1721             SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1722             SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
1723 
1724             // Fill reference
1725             fill(lhs, 0);
1726             fill(rhs, 1);
1727 
1728             return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1729         }
1730         else
1731         {
1732             // Create reference
1733             SimpleTensor<int8_t>  lhs{ lhs_shape, data_type, 1 };
1734             SimpleTensor<int8_t>  rhs{ rhs_shape, data_type, 1 };
1735             SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
1736 
1737             // Fill reference
1738             fill(lhs, 0);
1739             fill(rhs, 1);
1740 
1741             return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1742         }
1743     }
1744 
1745     bool                  gemm_validated = true;
1746     TensorType            _target{};
1747     SimpleTensor<int32_t> _reference{};
1748 };
1749 
1750 template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1751 class GEMMLowpMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
1752 {
1753 public:
1754     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,DataType data_type)1755     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1756                unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
1757     {
1758         GEMMLHSMatrixInfo lhs_info;
1759         lhs_info.m0 = m0;
1760         lhs_info.k0 = k0;
1761 
1762         GEMMRHSMatrixInfo rhs_info;
1763         rhs_info.n0         = n0;
1764         rhs_info.k0         = k0;
1765         rhs_info.h0         = h0;
1766         rhs_info.interleave = interleave_rhs;
1767         rhs_info.transpose  = transpose_rhs;
1768 
1769         // In case of GEMM3D, m is the product between m_w and m_h
1770         const unsigned int m = m_w * m_h;
1771 
1772         // Set the tensor shapes for LHS and RHS matrices
1773         const TensorShape lhs_shape(k, m, batch_size);
1774         const TensorShape rhs_shape(n, k, batch_size);
1775 
1776         _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h, data_type);
1777         _reference = compute_reference(lhs_shape, rhs_shape, m_h, data_type);
1778     }
1779 
1780 protected:
1781     template <typename U>
fill(U && tensor,int i)1782     void fill(U &&tensor, int i)
1783     {
1784         switch(tensor.data_type())
1785         {
1786             case DataType::QASYMM8:
1787             {
1788                 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1789                 std::uniform_int_distribution<> distribution(1, 254);
1790                 library->fill(tensor, distribution, i);
1791             }
1792             break;
1793             case DataType::QASYMM8_SIGNED:
1794             {
1795                 std::uniform_int_distribution<> distribution(-127, 126);
1796                 library->fill(tensor, distribution, i);
1797             }
1798             break;
1799             default:
1800                 ARM_COMPUTE_ERROR("Unsupported data type");
1801         }
1802     }
1803 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,unsigned int m_h,DataType data_type)1804     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
1805                               const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h, DataType data_type)
1806     {
1807         // Create tensors
1808         TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1809         TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1810         TensorType rhs_reshaped;
1811         TensorType dst;
1812 
1813         const unsigned int M = lhs_shape[1];
1814         const unsigned int N = rhs_shape[0];
1815         const unsigned int K = lhs_shape[0];
1816 
1817         GEMMKernelInfo gemm_info;
1818         gemm_info.m                   = M;
1819         gemm_info.n                   = N;
1820         gemm_info.k                   = K;
1821         gemm_info.depth_output_gemm3d = m_h;
1822         gemm_info.lhs_info            = lhs_info;
1823         gemm_info.rhs_info            = rhs_info;
1824         // The output tensor will be auto-initialized within the function
1825 
1826         // Create and configure function
1827         ReshapeRHSOperatorType reshape_rhs;
1828         GEMMFunctionType       gemm;
1829         reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1830         gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
1831 
1832         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1833         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1834 
1835         add_padding_x({ &lhs, &rhs, &rhs_reshaped, &dst });
1836 
1837         // Allocate tensors
1838         lhs.allocator()->allocate();
1839         rhs.allocator()->allocate();
1840         rhs_reshaped.allocator()->allocate();
1841         dst.allocator()->allocate();
1842 
1843         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1844         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1845         ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1846         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1847 
1848         // Fill tensors
1849         fill(AccessorType(lhs), 0);
1850         fill(AccessorType(rhs), 1);
1851 
1852         // Compute GEMM
1853         ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1854         reshape_rhs.run(reshape_rhs_pack);
1855         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1856         gemm.run(gemm_pack);
1857 
1858         return dst;
1859     }
1860 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,unsigned int m_h,DataType data_type)1861     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h, DataType data_type)
1862     {
1863         TensorShape dst_shape = lhs_shape;
1864         dst_shape.set(0, rhs_shape[0]);
1865         dst_shape.set(1, lhs_shape[1] / m_h);
1866         dst_shape.set(2, m_h);
1867         dst_shape.set(3, lhs_shape[2]);
1868 
1869         if(data_type == DataType::QASYMM8)
1870         {
1871             // Create reference
1872             SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1873             SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1874 
1875             // Fill reference
1876             fill(lhs, 0);
1877             fill(rhs, 1);
1878 
1879             return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1880         }
1881         else
1882         {
1883             // Create reference
1884             SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1885             SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1886 
1887             // Fill reference
1888             fill(lhs, 0);
1889             fill(rhs, 1);
1890 
1891             return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1892         }
1893     }
1894 
1895     TensorType            _target{};
1896     SimpleTensor<int32_t> _reference{};
1897 };
1898 
1899 template <typename TensorType, typename AccessorType, typename GEMMFunctionType>
1900 class GEMMLowpMatrixMultiplyNativeValidationFixture : public framework::Fixture
1901 {
1902 public:
1903     template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0)1904     void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0)
1905     {
1906         GEMMLHSMatrixInfo lhs_info;
1907         lhs_info.m0 = m0;
1908         lhs_info.k0 = k0;
1909 
1910         GEMMRHSMatrixInfo rhs_info;
1911         rhs_info.n0 = n0;
1912         rhs_info.k0 = k0;
1913 
1914         // Set the tensor shapes for LHS and RHS matrices
1915         const TensorShape lhs_shape(k, m, batch_size);
1916         const TensorShape rhs_shape(n, k, batch_size);
1917 
1918         _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info);
1919         _reference = compute_reference(lhs_shape, rhs_shape);
1920     }
1921 
1922 protected:
1923     template <typename U>
fill(U && tensor,int i)1924     void fill(U &&tensor, int i)
1925     {
1926         // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1927         std::uniform_int_distribution<> distribution(1, 254);
1928         library->fill(tensor, distribution, i);
1929     }
1930 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info)1931     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info)
1932     {
1933         // Create tensors
1934         TensorType lhs = create_tensor<TensorType>(lhs_shape, DataType::QASYMM8, 1);
1935         TensorType rhs = create_tensor<TensorType>(rhs_shape, DataType::QASYMM8, 1);
1936         TensorType dst;
1937 
1938         const unsigned int M = lhs_shape[1];
1939         const unsigned int N = rhs_shape[0];
1940         const unsigned int K = lhs_shape[0];
1941 
1942         // The output tensor will be auto-initialized within the function
1943 
1944         // Create and configure function
1945         GEMMFunctionType gemm;
1946         gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
1947 
1948         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1949         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1950 
1951         add_padding_x({ &lhs, &rhs, &dst });
1952 
1953         // Allocate tensors
1954         lhs.allocator()->allocate();
1955         rhs.allocator()->allocate();
1956         dst.allocator()->allocate();
1957 
1958         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1959         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1960         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1961 
1962         // Fill tensors
1963         fill(AccessorType(lhs), 0);
1964         fill(AccessorType(rhs), 1);
1965 
1966         // Compute GEMM
1967         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } });
1968         gemm.run(gemm_pack);
1969 
1970         return dst;
1971     }
1972 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape)1973     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape)
1974     {
1975         TensorShape dst_shape = lhs_shape;
1976         dst_shape[0]          = rhs_shape[0];
1977         dst_shape[1]          = lhs_shape[1];
1978 
1979         // Create reference
1980         SimpleTensor<uint8_t> lhs{ lhs_shape, DataType::QASYMM8, 1 };
1981         SimpleTensor<uint8_t> rhs{ rhs_shape, DataType::QASYMM8, 1 };
1982 
1983         // Fill reference
1984         fill(lhs, 0);
1985         fill(rhs, 1);
1986 
1987         return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1988     }
1989 
1990     TensorType            _target{};
1991     SimpleTensor<int32_t> _reference{};
1992 };
1993 
1994 template <typename TensorType, typename AccessorType, typename GEMMFunctionType>
1995 class GEMMLowpMatrixMultiplyNative3DValidationFixture : public framework::Fixture
1996 {
1997 public:
1998     template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0)1999     void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0)
2000     {
2001         GEMMLHSMatrixInfo lhs_info;
2002         lhs_info.m0 = m0;
2003         lhs_info.k0 = k0;
2004 
2005         GEMMRHSMatrixInfo rhs_info;
2006         rhs_info.n0 = n0;
2007         rhs_info.k0 = k0;
2008 
2009         // In case of GEMM3D, m is the product between m_w and m_h
2010         const unsigned int m = m_w * m_h;
2011 
2012         // Set the tensor shapes for LHS and RHS matrices
2013         const TensorShape lhs_shape(k, m, batch_size);
2014         const TensorShape rhs_shape(n, k, batch_size);
2015 
2016         _target    = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h);
2017         _reference = compute_reference(lhs_shape, rhs_shape, m_h);
2018     }
2019 
2020 protected:
2021     template <typename U>
fill(U && tensor,int i)2022     void fill(U &&tensor, int i)
2023     {
2024         // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
2025         std::uniform_int_distribution<> distribution(1, 254);
2026         library->fill(tensor, distribution, i);
2027     }
2028 
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,unsigned int m_h)2029     TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h)
2030     {
2031         // Create tensors
2032         TensorType lhs = create_tensor<TensorType>(lhs_shape, DataType::QASYMM8, 1);
2033         TensorType rhs = create_tensor<TensorType>(rhs_shape, DataType::QASYMM8, 1);
2034         TensorType dst;
2035 
2036         const unsigned int M = lhs_shape[1];
2037         const unsigned int N = rhs_shape[0];
2038         const unsigned int K = lhs_shape[0];
2039 
2040         // The output tensor will be auto-initialized within the function
2041 
2042         // Create and configure function
2043         GEMMFunctionType gemm;
2044         gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
2045 
2046         ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2047         ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2048 
2049         add_padding_x({ &lhs, &rhs, &dst });
2050 
2051         // Allocate tensors
2052         lhs.allocator()->allocate();
2053         rhs.allocator()->allocate();
2054         dst.allocator()->allocate();
2055 
2056         ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2057         ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2058         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2059 
2060         // Fill tensors
2061         fill(AccessorType(lhs), 0);
2062         fill(AccessorType(rhs), 1);
2063 
2064         // Compute GEMM
2065         ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } });
2066         gemm.run(gemm_pack);
2067 
2068         return dst;
2069     }
2070 
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,unsigned int m_h)2071     SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h)
2072     {
2073         TensorShape dst_shape = lhs_shape;
2074         dst_shape.set(0, rhs_shape[0]);
2075         dst_shape.set(1, lhs_shape[1] / m_h);
2076         dst_shape.set(2, m_h);
2077         dst_shape.set(3, lhs_shape[2]);
2078 
2079         // Create reference
2080         SimpleTensor<uint8_t> lhs{ lhs_shape, DataType::QASYMM8, 1 };
2081         SimpleTensor<uint8_t> rhs{ rhs_shape, DataType::QASYMM8, 1 };
2082 
2083         // Fill reference
2084         fill(lhs, 0);
2085         fill(rhs, 1);
2086 
2087         return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
2088     }
2089 
2090     TensorType            _target{};
2091     SimpleTensor<int32_t> _reference{};
2092 };
2093 } // namespace validation
2094 } // namespace test
2095 } // namespace arm_compute
2096 #endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
2097