1 /*
2 * Copyright (c) 2017-2022 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
25 #define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
26
27 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
28 #include "tests/framework/Fixture.h"
29 #include "tests/validation/Validation.h"
30 #include "tests/validation/reference/GEMMLowp.h"
31
32 namespace arm_compute
33 {
34 namespace test
35 {
36 namespace validation
37 {
38 namespace
39 {
40 template <typename U>
fill(U && tensor,int i)41 void fill(U &&tensor, int i)
42 {
43 switch(tensor.data_type())
44 {
45 case DataType::QSYMM8_PER_CHANNEL:
46 {
47 int min_bound = 128;
48 int max_bound = -127;
49 for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++)
50 {
51 std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
52 if(bounds.first < min_bound)
53 {
54 min_bound = bounds.first;
55 }
56 if(bounds.second > max_bound)
57 {
58 max_bound = bounds.second;
59 }
60 }
61 std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
62 library->fill(tensor, distribution, i);
63 break;
64 }
65 case DataType::QASYMM8:
66 {
67 std::uniform_int_distribution<uint32_t> distribution(1, 254);
68 library->fill(tensor, distribution, i);
69 break;
70 }
71 case DataType::S32:
72 {
73 std::uniform_int_distribution<int32_t> distribution(-20000, 20000);
74 library->fill(tensor, distribution, i);
75 break;
76 }
77 case DataType::F16:
78 {
79 arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
80 library->fill(tensor, distribution, i);
81 break;
82 }
83 case DataType::F32:
84 {
85 std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
86 library->fill(tensor, distribution, i);
87 break;
88 }
89 default:
90 library->fill_tensor_uniform(tensor, i);
91 }
92 }
93
94 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
95 TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
96 GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
97 QuantizationInfo b_qinfo = QuantizationInfo(), bool reshape_b_only_on_first_run = false)
98 {
99 // Create tensors
100 DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
101
102 TensorType a = create_tensor<TensorType>(shape_a, data_type_a, 1);
103 TensorType b = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
104 TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1);
105
106 a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
107
108 if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
109 {
110 b.info()->set_quantization_info(b_qinfo);
111 }
112 else
113 {
114 b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
115 }
116 TensorType bias;
117 if(is_fused)
118 {
119 TensorShape bias_shape(shape_b[0]);
120 bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
121 }
122
123 // Create and configure function
124 // The GEMMinfo includes the values of the depth in case of reinterpreted 3d input/output
125 FunctionType gemmlowp;
126 gemmlowp.configure(&a, &b, is_fused ? &bias : nullptr, &output, GEMMInfo(false, false, reshape_b_only_on_first_run, (reinterpret_output_as_3d ? shape_output[2] : 0), reinterpret_input_as_3d, false,
127 output_stage));
128
129 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
130 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
131 ARM_COMPUTE_ASSERT(output.info()->is_resizable());
132
133 add_padding_x({ &a, &b, &output });
134
135 // Allocate tensors
136 a.allocator()->allocate();
137 b.allocator()->allocate();
138 output.allocator()->allocate();
139
140 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
141 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
142 ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
143
144 // Fill tensors
145 fill(AccessorType(a), 0);
146 fill(AccessorType(b), 1);
147
148 if(is_fused)
149 {
150 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
151 bias.allocator()->allocate();
152 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
153 fill(AccessorType(bias), 2);
154 }
155
156 // Run with variable inputs.
157 if(run_twice)
158 {
159 gemmlowp.run();
160 fill(AccessorType(a), 3); // Fill tensors with new seed after run
161 fill(AccessorType(b), 4);
162 if(is_fused)
163 {
164 fill(AccessorType(bias), 5);
165 }
166 }
167
168 // Compute GEMM function
169 gemmlowp.run();
170 return output;
171 }
172
173 template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t, bool pretranspose_A = false, bool pretranspose_B = false, bool run_twice = false>
174 SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
175 DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
176 {
177 TensorShape shape_a_to_use = shape_a;
178 if(reinterpret_input_as_3d)
179 {
180 // Collapse the second and third dimension if the input is 3D
181 shape_a_to_use.collapse(2U, 1U);
182 }
183
184 // Create reference
185 SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1 };
186 SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
187
188 TensorShape shape_a_to_use_transposed{ shape_a_to_use };
189 TensorShape shape_b_transposed{ shape_b };
190
191 shape_a_to_use_transposed.set(0, shape_a_to_use[1]);
192 shape_a_to_use_transposed.set(1, shape_a_to_use[0]);
193 shape_b_transposed.set(0, shape_b[1]);
194 shape_b_transposed.set(1, shape_b[0]);
195
196 SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1 };
197 SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
198
199 // Fill reference
200 fill(a, 0);
201 fill(b, 1);
202
203 // Transpose reference if required
204 /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
205 therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
206 in order to be able to call reference implementation that works with (B x M x K) input.
207 Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
208 if(pretranspose_A)
209 {
210 transpose_matrix<TI>(a, a_transposed);
211 }
212
213 if(pretranspose_B)
214 {
215 transpose_matrix<TW>(b, b_transposed);
216 }
217
218 // Run with variable inputs.
219 if(run_twice)
220 {
221 reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
222 fill((pretranspose_A) ? a_transposed : a, 3);
223 fill((pretranspose_B) ? b_transposed : b, 4);
224 }
225
226 return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
227 }
228 }
229
230 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
231 class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
232 {
233 public:
234 template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_output,int32_t a_offset,int32_t b_offset)235 void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
236 {
237 _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset);
238 _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset);
239 }
240
241 protected:
compute_target(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_output,int32_t a_offset,int32_t b_offset)242 TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
243 {
244 return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_offset,
245 b_offset);
246 }
247
compute_reference(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_output,int32_t a_offset,int32_t b_offset)248 SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
249 {
250 return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset);
251 }
252
253 TensorType _target{};
254 SimpleTensor<int32_t> _reference{};
255 };
256
257 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t, bool run_twice = false>
258 class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture : public framework::Fixture
259 {
260 public:
261 template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_output,int32_t a_offset,int32_t b_offset,GEMMLowpOutputStageInfo output_stage,DataType data_type_b,bool reshape_b_only_on_first_run)262 void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b,
263 bool reshape_b_only_on_first_run)
264 {
265 ARM_COMPUTE_ASSERT(output_stage.type != GEMMLowpOutputStageType::NONE);
266 DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8;
267
268 if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
269 {
270 output_stage.is_quantized_per_channel = true;
271 const size_t num_channels = shape_b[0];
272 std::vector<float> scales(num_channels);
273 std::uniform_real_distribution<float> distribution(0.f, 1.f);
274 library->fill(scales, distribution, 0);
275 output_stage.gemmlowp_multipliers.resize(num_channels);
276 output_stage.gemmlowp_shifts.resize(num_channels);
277 for(size_t i = 0; i < num_channels; ++i)
278 {
279 quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]);
280 }
281
282 _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales));
283 _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales), reshape_b_only_on_first_run);
284 }
285 else
286 {
287 _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo());
288 _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo(), reshape_b_only_on_first_run);
289 }
290 }
291
292 protected:
293 TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage,
294 DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo, bool reshape_b_only_on_first_run = false)
295 {
296 return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_offset,
297 b_offset,
298 output_stage, data_type_a, data_type_b, b_qinfo, reshape_b_only_on_first_run);
299 }
300
compute_reference(const TensorShape & shape_a,const TensorShape & shape_b,const TensorShape & shape_output,int32_t a_offset,int32_t b_offset,GEMMLowpOutputStageInfo output_stage,DataType data_type_a,DataType data_type_b,QuantizationInfo b_qinfo)301 SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
302 GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo)
303 {
304 SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b,
305 b_qinfo);
306
307 TensorShape bias_shape(shape_b[0]);
308 SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
309 (run_twice) ? fill(bias, 5) : fill(bias, 2); // Fill bias with same seed as last run of gemmlowp_target
310
311 switch(output_stage.type)
312 {
313 case GEMMLowpOutputStageType::QUANTIZE_DOWN:
314 return reference::gemmlowp_quantize_down_scale<int32_t, TW>(output, bias,
315 output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
316 break;
317 case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT:
318 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, TW>(output, bias,
319 output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
320 break;
321 default:
322 ARM_COMPUTE_ERROR("Not Supported!");
323 }
324 }
325
326 TensorType _target{};
327 SimpleTensor<TI> _reference{};
328 };
329
330 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, typename TI = uint8_t, typename TW = uint8_t>
331 class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public
332 GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>
333 {
334 public:
335 template <typename...>
setup(TensorShape shape_a,TensorShape shape_b,TensorShape shape_output,int32_t a_offset,int32_t b_offset,GEMMLowpOutputStageInfo output_stage,DataType data_type_b)336 void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b)
337 {
338 GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>::setup(shape_a, shape_b,
339 shape_output, a_offset, b_offset, output_stage, data_type_b, false);
340 }
341 };
342
343 template <typename TensorType, typename AccessorType, typename FunctionType>
344 class GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture : public framework::Fixture
345 {
346 public:
347 template <typename...>
setup(TensorShape shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)348 void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
349 {
350 _target = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
351 _reference = compute_reference(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
352 }
353
354 protected:
355 template <typename U>
fill(U && tensor,int i)356 void fill(U &&tensor, int i)
357 {
358 std::uniform_int_distribution<> distribution(-6000, 6000);
359 library->fill(tensor, distribution, i);
360 }
361
compute_target(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)362 TensorType compute_target(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
363 {
364 TensorShape shape_bias(shape[0]);
365
366 // Create tensors
367 TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
368 TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
369 TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
370
371 // Create and configure function
372 FunctionType output_stage;
373 GEMMLowpOutputStageInfo output_stage_info = GEMMLowpOutputStageInfo();
374 output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN;
375 output_stage_info.gemmlowp_offset = result_offset;
376 output_stage_info.gemmlowp_multiplier = result_mult_int;
377 output_stage_info.gemmlowp_shift = result_shift;
378 output_stage_info.gemmlowp_min_bound = min;
379 output_stage_info.gemmlowp_max_bound = max;
380 output_stage_info.output_data_type = DataType::QASYMM8;
381 output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info);
382
383 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
384 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
385
386 // Allocate tensors
387 a.allocator()->allocate();
388 c.allocator()->allocate();
389
390 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
391 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
392
393 // Fill tensor
394 fill(AccessorType(a), 0);
395
396 if(add_bias)
397 {
398 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
399
400 // Allocate bias tensor
401 b.allocator()->allocate();
402
403 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
404
405 // Fill tensor
406 fill(AccessorType(b), 1);
407 }
408
409 // Compute GEMM function
410 output_stage.run();
411 return c;
412 }
413
compute_reference(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)414 SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
415 {
416 // Create reference
417 TensorShape shape_bias(shape[0]);
418
419 SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
420 SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
421
422 // Fill reference
423 fill(a, 0);
424
425 const std::vector<int32_t> result_mult_int_vec = { result_mult_int };
426 const std::vector<int32_t> result_shift_vec = { result_shift };
427
428 if(add_bias)
429 {
430 // Fill bias
431 fill(b, 1);
432
433 return reference::gemmlowp_quantize_down_scale<int32_t, uint8_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max);
434 }
435 else
436 {
437 return reference::gemmlowp_quantize_down_scale<int32_t, uint8_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max);
438 }
439 }
440
441 TensorType _target{};
442 SimpleTensor<uint8_t> _reference{};
443 };
444
445 template <typename TensorType, typename AccessorType, typename FunctionType>
446 class GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture : public framework::Fixture
447 {
448 public:
449 template <typename...>
setup(TensorShape shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)450 void setup(TensorShape shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
451 {
452 _target = compute_target(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
453 _reference = compute_reference(shape, result_offset, result_mult_int, result_shift, min, max, add_bias);
454 }
455
456 protected:
457 template <typename U>
fill(U && tensor,int i)458 void fill(U &&tensor, int i)
459 {
460 std::uniform_int_distribution<> distribution(-6000, 6000);
461 library->fill(tensor, distribution, i);
462 }
463
compute_target(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)464 TensorType compute_target(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
465 {
466 TensorShape shape_bias(shape[0]);
467
468 // Create tensors
469 TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
470 TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
471 TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8_SIGNED, 1);
472
473 // Create and configure function
474 FunctionType output_stage;
475 GEMMLowpOutputStageInfo output_stage_info = GEMMLowpOutputStageInfo();
476 output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN;
477 output_stage_info.gemmlowp_offset = result_offset;
478 output_stage_info.gemmlowp_multiplier = result_mult_int;
479 output_stage_info.gemmlowp_shift = result_shift;
480 output_stage_info.gemmlowp_min_bound = min;
481 output_stage_info.gemmlowp_max_bound = max;
482 output_stage_info.output_data_type = DataType::QASYMM8_SIGNED;
483 output_stage.configure(&a, add_bias ? &b : nullptr, &c, output_stage_info);
484
485 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
486 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
487
488 // Allocate tensors
489 a.allocator()->allocate();
490 c.allocator()->allocate();
491
492 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
493 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
494
495 // Fill tensor
496 fill(AccessorType(a), 0);
497
498 if(add_bias)
499 {
500 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
501
502 // Allocate bias tensor
503 b.allocator()->allocate();
504
505 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
506
507 // Fill tensor
508 fill(AccessorType(b), 1);
509 }
510
511 // Compute GEMM function
512 output_stage.run();
513 return c;
514 }
515
compute_reference(const TensorShape & shape,int32_t result_offset,int32_t result_mult_int,int32_t result_shift,int32_t min,int32_t max,bool add_bias)516 SimpleTensor<int8_t> compute_reference(const TensorShape &shape, int32_t result_offset, int32_t result_mult_int, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
517 {
518 // Create reference
519 TensorShape shape_bias(shape[0]);
520
521 SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
522 SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
523
524 // Fill reference
525 fill(a, 0);
526
527 const std::vector<int32_t> result_mult_int_vec = { result_mult_int };
528 const std::vector<int32_t> result_shift_vec = { result_shift };
529
530 if(add_bias)
531 {
532 // Fill bias
533 fill(b, 1);
534
535 return reference::gemmlowp_quantize_down_scale<int32_t, int8_t>(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max);
536 }
537 else
538 {
539 return reference::gemmlowp_quantize_down_scale<int32_t, int8_t>(a, result_offset, result_mult_int_vec, result_shift_vec, min, max);
540 }
541 }
542
543 TensorType _target{};
544 SimpleTensor<int8_t> _reference{};
545 };
546
547 template <typename TensorType, typename AccessorType, typename FunctionType>
548 class GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture : public framework::Fixture
549 {
550 public:
551 template <typename...>
setup(TensorShape shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)552 void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
553 {
554 _target = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
555 _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
556 }
557
558 protected:
559 template <typename U>
fill(U && tensor,int i)560 void fill(U &&tensor, int i)
561 {
562 std::uniform_int_distribution<> distribution(-6000, 6000);
563 library->fill(tensor, distribution, i);
564 }
565
compute_target(const TensorShape & shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)566 TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
567 {
568 TensorShape shape_bias(shape[0]);
569
570 // Create tensors
571 TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
572 TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
573 TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8_SIGNED, 1);
574
575 // Create and configure function
576 FunctionType output_stage;
577 output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
578
579 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
580 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
581
582 // Allocate tensors
583 a.allocator()->allocate();
584 c.allocator()->allocate();
585
586 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
587 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
588
589 // Fill tensor
590 fill(AccessorType(a), 0);
591
592 if(add_bias)
593 {
594 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
595
596 // Allocate bias tensor
597 b.allocator()->allocate();
598
599 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
600
601 // Fill tensor
602 fill(AccessorType(b), 1);
603 }
604
605 // Compute GEMM function
606 output_stage.run();
607 return c;
608 }
609
compute_reference(const TensorShape & shape,int32_t result_fixed_point_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)610 SimpleTensor<int8_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max,
611 bool add_bias)
612 {
613 // Create reference
614 TensorShape shape_bias(shape[0]);
615
616 SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
617 SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
618
619 // Fill reference
620 fill(a, 0);
621
622 const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
623 const std::vector<int32_t> result_shift_vec = { result_shift };
624
625 if(add_bias)
626 {
627 // Fill bias
628 fill(b, 1);
629
630 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int8_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
631 }
632 else
633 {
634 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int8_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
635 }
636 }
637
638 TensorType _target{};
639 SimpleTensor<int8_t> _reference{};
640 };
641
642 template <typename TensorType, typename AccessorType, typename FunctionType>
643 class GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture : public framework::Fixture
644 {
645 public:
646 template <typename...>
setup(TensorShape shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)647 void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
648 {
649 _target = compute_target(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
650 _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max, add_bias);
651 }
652
653 protected:
654 template <typename U>
fill(U && tensor,int i)655 void fill(U &&tensor, int i)
656 {
657 std::uniform_int_distribution<> distribution(-6000, 6000);
658 library->fill(tensor, distribution, i);
659 }
660
compute_target(const TensorShape & shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)661 TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max, bool add_bias)
662 {
663 TensorShape shape_bias(shape[0]);
664
665 // Create tensors
666 TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
667 TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
668 TensorType c = create_tensor<TensorType>(shape, DataType::QASYMM8, 1);
669
670 // Create and configure function
671 FunctionType output_stage;
672 output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, result_offset_after_shift, min, max);
673
674 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
675 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
676
677 // Allocate tensors
678 a.allocator()->allocate();
679 c.allocator()->allocate();
680
681 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
682 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
683
684 // Fill tensor
685 fill(AccessorType(a), 0);
686
687 if(add_bias)
688 {
689 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
690
691 // Allocate bias tensor
692 b.allocator()->allocate();
693
694 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
695
696 // Fill tensor
697 fill(AccessorType(b), 1);
698 }
699
700 // Compute GEMM function
701 output_stage.run();
702 return c;
703 }
704
compute_reference(const TensorShape & shape,int32_t result_fixed_point_multiplier,int32_t result_shift,int32_t result_offset_after_shift,int32_t min,int32_t max,bool add_bias)705 SimpleTensor<uint8_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t result_offset_after_shift, int32_t min, int32_t max,
706 bool add_bias)
707 {
708 // Create reference
709 TensorShape shape_bias(shape[0]);
710
711 SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
712 SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
713
714 // Fill reference
715 fill(a, 0);
716
717 const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
718 const std::vector<int32_t> result_shift_vec = { result_shift };
719
720 if(add_bias)
721 {
722 // Fill bias
723 fill(b, 1);
724
725 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, uint8_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
726 }
727 else
728 {
729 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, uint8_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max);
730 }
731 }
732
733 TensorType _target{};
734 SimpleTensor<uint8_t> _reference{};
735 };
736
737 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
738 class GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture : public framework::Fixture
739 {
740 public:
741 template <typename...>
setup(DataType data_type,TensorShape shape,float result_real_multiplier,int32_t result_offset,int32_t min,int32_t max,bool add_bias)742 void setup(DataType data_type, TensorShape shape, float result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
743 {
744 _target = compute_target(data_type, shape, result_real_multiplier, result_offset, min, max, add_bias);
745 _reference = compute_reference(shape, result_real_multiplier, result_offset, min, max, add_bias);
746 }
747
748 protected:
749 template <typename U>
fill(U && tensor,int i)750 void fill(U &&tensor, int i)
751 {
752 // To avoid data all being clampped
753 std::uniform_int_distribution<> distribution(-500, 500);
754 library->fill(tensor, distribution, i);
755 }
756
compute_target(DataType data_type,const TensorShape & shape,float result_multiplier,int32_t result_offset,int32_t min,int32_t max,bool add_bias)757 TensorType compute_target(DataType data_type, const TensorShape &shape, float result_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
758 {
759 TensorShape shape_bias(shape[0]);
760
761 // Create tensors
762 TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
763 TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
764 TensorType c = create_tensor<TensorType>(shape, data_type, 1);
765
766 // create output stage info
767 GEMMLowpOutputStageInfo info;
768 info.gemmlowp_max_bound = max;
769 info.gemmlowp_min_bound = min;
770 info.gemmlowp_real_multiplier = result_multiplier;
771 info.gemmlowp_offset = result_offset;
772 info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FLOAT;
773 info.output_data_type = data_type;
774
775 // Create and configure function
776 FunctionType output_stage;
777 output_stage.configure(&a, add_bias ? &b : nullptr, &c, info);
778
779 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
780 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
781
782 // Allocate tensors
783 a.allocator()->allocate();
784 c.allocator()->allocate();
785
786 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
787 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
788
789 // Fill tensor
790 fill(AccessorType(a), 0);
791
792 if(add_bias)
793 {
794 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
795
796 // Allocate bias tensor
797 b.allocator()->allocate();
798
799 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
800
801 // Fill tensor
802 fill(AccessorType(b), 1);
803 }
804
805 // Compute GEMM function
806 output_stage.run();
807 return c;
808 }
809
compute_reference(const TensorShape & shape,float_t result_real_multiplier,int32_t result_offset,int32_t min,int32_t max,bool add_bias)810 SimpleTensor<T> compute_reference(const TensorShape &shape, float_t result_real_multiplier, int32_t result_offset, int32_t min, int32_t max, bool add_bias)
811 {
812 // Create reference
813 TensorShape shape_bias(shape[0]);
814
815 SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
816 SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
817
818 // Fill reference
819 fill(a, 0);
820
821 const std::vector<float_t> result_float_multiplier_vec = { result_real_multiplier };
822
823 if(add_bias)
824 {
825 // Fill bias
826 fill(b, 1);
827
828 return reference::gemmlowp_quantize_down_scale_by_float<int32_t, T>(a, b, result_float_multiplier_vec, result_offset, min, max);
829 }
830 else
831 {
832 return reference::gemmlowp_quantize_down_scale_by_float<int32_t, T>(a, result_float_multiplier_vec, result_offset, min, max);
833 }
834 }
835
836 TensorType _target{};
837 SimpleTensor<T> _reference{};
838 };
839
840 template <typename TensorType, typename AccessorType, typename FunctionType>
841 class GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture : public framework::Fixture
842 {
843 public:
844 template <typename...>
setup(TensorShape shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t min,int32_t max,bool add_bias)845 void setup(TensorShape shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
846 {
847 _target = compute_target(shape, result_fixedpoint_multiplier, result_shift, min, max, add_bias);
848 _reference = compute_reference(shape, result_fixedpoint_multiplier, result_shift, min, max, add_bias);
849 }
850
851 protected:
852 template <typename U>
fill(U && tensor,int i)853 void fill(U &&tensor, int i)
854 {
855 std::uniform_int_distribution<> distribution(-6000, 6000);
856 library->fill(tensor, distribution, i);
857 }
858
compute_target(const TensorShape & shape,int32_t result_fixedpoint_multiplier,int32_t result_shift,int32_t min,int32_t max,bool add_bias)859 TensorType compute_target(const TensorShape &shape, int32_t result_fixedpoint_multiplier, int32_t result_shift, int32_t min, int32_t max, bool add_bias)
860 {
861 TensorShape shape_bias(shape[0]);
862
863 // Create tensors
864 TensorType a = create_tensor<TensorType>(shape, DataType::S32, 1);
865 TensorType b = create_tensor<TensorType>(shape_bias, DataType::S32, 1);
866 TensorType c = create_tensor<TensorType>(shape, DataType::QSYMM16, 1);
867
868 // Create and configure function
869 FunctionType output_stage;
870 output_stage.configure(&a, add_bias ? &b : nullptr, &c, result_fixedpoint_multiplier, result_shift, min, max);
871
872 ARM_COMPUTE_ASSERT(a.info()->is_resizable());
873 ARM_COMPUTE_ASSERT(c.info()->is_resizable());
874
875 // Allocate tensors
876 a.allocator()->allocate();
877 c.allocator()->allocate();
878
879 ARM_COMPUTE_ASSERT(!a.info()->is_resizable());
880 ARM_COMPUTE_ASSERT(!c.info()->is_resizable());
881
882 // Fill tensor
883 fill(AccessorType(a), 0);
884
885 if(add_bias)
886 {
887 ARM_COMPUTE_ASSERT(b.info()->is_resizable());
888
889 // Allocate bias tensor
890 b.allocator()->allocate();
891
892 ARM_COMPUTE_ASSERT(!b.info()->is_resizable());
893
894 // Fill tensor
895 fill(AccessorType(b), 1);
896 }
897
898 // Compute GEMM function
899 output_stage.run();
900 return c;
901 }
902
compute_reference(const TensorShape & shape,int32_t result_fixed_point_multiplier,int32_t result_shift,int32_t min,int32_t max,bool add_bias)903 SimpleTensor<int16_t> compute_reference(const TensorShape &shape, int32_t result_fixed_point_multiplier, int32_t result_shift, int32_t min, int32_t max,
904 bool add_bias)
905 {
906 // Create reference
907 TensorShape shape_bias(shape[0]);
908
909 SimpleTensor<int32_t> a{ shape, DataType::S32, 1 };
910 SimpleTensor<int32_t> b{ shape_bias, DataType::S32, 1 };
911
912 // Fill reference
913 fill(a, 0);
914
915 const std::vector<int32_t> result_fixed_point_multiplier_vec = { result_fixed_point_multiplier };
916 const std::vector<int32_t> result_shift_vec = { result_shift };
917
918 if(add_bias)
919 {
920 // Fill bias
921 fill(b, 1);
922
923 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int16_t>(a, b, result_fixed_point_multiplier_vec, result_shift_vec, 0, min, max);
924 }
925 else
926 {
927 return reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, int16_t>(a, result_fixed_point_multiplier_vec, result_shift_vec, 0, min, max);
928 }
929 }
930
931 TensorType _target{};
932 SimpleTensor<int16_t> _reference{};
933 };
934
935 template <typename TensorType, typename AccessorType, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
936 class GEMMLowpMatrixMultiplyReshapedValidationFixture : public framework::Fixture
937 {
938 public:
939 template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,DataType data_type)940 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0, bool interleave_lhs,
941 bool interleave_rhs, DataType data_type)
942 {
943 GEMMLHSMatrixInfo lhs_info;
944 lhs_info.m0 = m0;
945 lhs_info.k0 = k0;
946 lhs_info.v0 = v0;
947 lhs_info.interleave = interleave_lhs;
948 lhs_info.transpose = false;
949
950 GEMMRHSMatrixInfo rhs_info;
951 rhs_info.n0 = n0;
952 rhs_info.k0 = k0;
953 rhs_info.h0 = h0;
954 rhs_info.interleave = interleave_rhs;
955 rhs_info.transpose = true;
956
957 // Set the tensor shapes for LHS and RHS matrices
958 const TensorShape lhs_shape(k, m, batch_size);
959 const TensorShape rhs_shape(n, k, batch_size);
960
961 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
962 _reference = compute_reference(lhs_shape, rhs_shape, data_type);
963 }
964
965 protected:
966 template <typename U>
fill(U && tensor,int i)967 void fill(U &&tensor, int i)
968 {
969 switch(tensor.data_type())
970 {
971 case DataType::QASYMM8:
972 {
973 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
974 std::uniform_int_distribution<> distribution(1, 254);
975 library->fill(tensor, distribution, i);
976 }
977 break;
978 case DataType::QASYMM8_SIGNED:
979 {
980 std::uniform_int_distribution<> distribution(-127, 126);
981 library->fill(tensor, distribution, i);
982 }
983 break;
984 default:
985 ARM_COMPUTE_ERROR("Unsupported data type");
986 }
987 }
988
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type)989 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
990 {
991 // Create tensors
992 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
993 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
994 TensorType lhs_reshaped;
995 TensorType rhs_reshaped;
996 TensorType dst;
997
998 const unsigned int M = lhs_shape[1];
999 const unsigned int N = rhs_shape[0];
1000 const unsigned int K = lhs_shape[0];
1001
1002 // The output tensor will be auto-initialized within the function
1003
1004 // Create and configure function
1005 ReshapeLHSOperatorType reshape_lhs;
1006 ReshapeRHSOperatorType reshape_rhs;
1007 GEMMFunctionType gemm;
1008 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1009 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1010 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
1011
1012 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1013 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1014
1015 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &dst });
1016
1017 // Allocate tensors
1018 lhs.allocator()->allocate();
1019 rhs.allocator()->allocate();
1020 lhs_reshaped.allocator()->allocate();
1021 rhs_reshaped.allocator()->allocate();
1022 dst.allocator()->allocate();
1023
1024 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1025 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1026 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1027 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1028 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1029
1030 // Fill tensors
1031 fill(AccessorType(lhs), 0);
1032 fill(AccessorType(rhs), 1);
1033
1034 // Compute GEMM
1035 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1036 reshape_lhs.run(reshape_lhs_pack);
1037 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1038 reshape_rhs.run(reshape_rhs_pack);
1039 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1040 gemm.run(gemm_pack);
1041
1042 return dst;
1043 }
1044
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type)1045 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
1046 {
1047 TensorShape dst_shape = lhs_shape;
1048 dst_shape[0] = rhs_shape[0];
1049 dst_shape[1] = lhs_shape[1];
1050
1051 switch(data_type)
1052 {
1053 case DataType::QASYMM8:
1054 {
1055 // Create reference
1056 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1057 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1058
1059 // Fill reference
1060 fill(lhs, 0);
1061 fill(rhs, 1);
1062
1063 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1064 }
1065 case DataType::QASYMM8_SIGNED:
1066 {
1067 // Create reference
1068 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1069 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1070
1071 // Fill reference
1072 fill(lhs, 0);
1073 fill(rhs, 1);
1074
1075 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1076 }
1077 default:
1078 ARM_COMPUTE_ERROR("Unsupported data type");
1079 }
1080 }
1081
1082 TensorType _target{};
1083 SimpleTensor<int32_t> _reference{};
1084 };
1085
1086 template <typename TensorType, typename AccessorType, typename ReshapeLHSOperatorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1087 class GEMMLowpMatrixMultiplyReshaped3DValidationFixture : public framework::Fixture
1088 {
1089 public:
1090 template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int v0,unsigned int h0,bool interleave_lhs,bool interleave_rhs,DataType data_type)1091 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0, unsigned int v0, unsigned int h0,
1092 bool interleave_lhs, bool interleave_rhs, DataType data_type)
1093 {
1094 GEMMLHSMatrixInfo lhs_info;
1095 lhs_info.m0 = m0;
1096 lhs_info.k0 = k0;
1097 lhs_info.v0 = v0;
1098 lhs_info.interleave = interleave_lhs;
1099 lhs_info.transpose = false;
1100
1101 GEMMRHSMatrixInfo rhs_info;
1102 rhs_info.n0 = n0;
1103 rhs_info.k0 = k0;
1104 rhs_info.h0 = h0;
1105 rhs_info.interleave = interleave_rhs;
1106 rhs_info.transpose = true;
1107
1108 // In case of GEMM3D, m is the product between m_w and m_h
1109 const unsigned int m = m_w * m_h;
1110
1111 // Set the tensor shapes for LHS and RHS matrices
1112 const TensorShape lhs_shape(k, m, batch_size);
1113 const TensorShape rhs_shape(n, k, batch_size);
1114
1115 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h, data_type);
1116 _reference = compute_reference(lhs_shape, rhs_shape, m_h, data_type);
1117 }
1118
1119 protected:
1120 template <typename U>
fill(U && tensor,int i)1121 void fill(U &&tensor, int i)
1122 {
1123 switch(tensor.data_type())
1124 {
1125 case DataType::QASYMM8:
1126 {
1127 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1128 std::uniform_int_distribution<> distribution(1, 254);
1129 library->fill(tensor, distribution, i);
1130 }
1131 break;
1132 case DataType::QASYMM8_SIGNED:
1133 {
1134 std::uniform_int_distribution<> distribution(-127, 126);
1135 library->fill(tensor, distribution, i);
1136 }
1137 break;
1138 default:
1139 ARM_COMPUTE_ERROR("Unsupported data type");
1140 }
1141 }
1142
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,unsigned int m_h,DataType data_type)1143 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h,
1144 DataType data_type)
1145 {
1146 // Create tensors
1147 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1148 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1149 TensorType lhs_reshaped;
1150 TensorType rhs_reshaped;
1151 TensorType dst;
1152
1153 const unsigned int M = lhs_shape[1];
1154 const unsigned int N = rhs_shape[0];
1155 const unsigned int K = lhs_shape[0];
1156
1157 // The output tensor will be auto-initialized within the function
1158
1159 // Create and configure function
1160 ReshapeLHSOperatorType reshape_lhs;
1161 ReshapeRHSOperatorType reshape_rhs;
1162 GEMMFunctionType gemm;
1163 reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
1164 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1165 gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
1166
1167 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1168 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1169
1170 add_padding_x({ &lhs, &rhs, &lhs_reshaped, &rhs_reshaped, &dst });
1171
1172 // Allocate tensors
1173 lhs.allocator()->allocate();
1174 rhs.allocator()->allocate();
1175 lhs_reshaped.allocator()->allocate();
1176 rhs_reshaped.allocator()->allocate();
1177 dst.allocator()->allocate();
1178
1179 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1180 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1181 ARM_COMPUTE_ASSERT(!lhs_reshaped.info()->is_resizable());
1182 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1183 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1184
1185 // Fill tensors
1186 fill(AccessorType(lhs), 0);
1187 fill(AccessorType(rhs), 1);
1188
1189 // Compute GEMM
1190 ITensorPack reshape_lhs_pack = { { ACL_SRC, &lhs }, { ACL_DST, &lhs_reshaped } };
1191 reshape_lhs.run(reshape_lhs_pack);
1192 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1193 reshape_rhs.run(reshape_rhs_pack);
1194 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs_reshaped }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1195 gemm.run(gemm_pack);
1196
1197 return dst;
1198 }
1199
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,unsigned int m_h,DataType data_type)1200 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h, DataType data_type)
1201 {
1202 TensorShape dst_shape = lhs_shape;
1203 dst_shape.set(0, rhs_shape[0]);
1204 dst_shape.set(1, lhs_shape[1] / m_h);
1205 dst_shape.set(2, m_h);
1206 dst_shape.set(3, lhs_shape[2]);
1207
1208 switch(data_type)
1209 {
1210 case DataType::QASYMM8:
1211 {
1212 // Create reference
1213 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1214 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1215
1216 // Fill reference
1217 fill(lhs, 0);
1218 fill(rhs, 1);
1219
1220 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1221 }
1222 case DataType::QASYMM8_SIGNED:
1223 {
1224 // Create reference
1225 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1226 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1227
1228 // Fill reference
1229 fill(lhs, 0);
1230 fill(rhs, 1);
1231
1232 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1233 }
1234 default:
1235 ARM_COMPUTE_ERROR("Unsupported data type");
1236 }
1237 }
1238
1239 TensorType _target{};
1240 SimpleTensor<int32_t> _reference{};
1241 };
1242
1243 template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1244 class GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture : public framework::Fixture
1245 {
1246 public:
1247 template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,DataType data_type)1248 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1249 unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
1250 {
1251 GEMMLHSMatrixInfo lhs_info;
1252 lhs_info.m0 = m0;
1253 lhs_info.k0 = k0;
1254
1255 GEMMRHSMatrixInfo rhs_info;
1256 rhs_info.n0 = n0;
1257 rhs_info.k0 = k0;
1258 rhs_info.h0 = h0;
1259 rhs_info.interleave = interleave_rhs;
1260 rhs_info.transpose = transpose_rhs;
1261
1262 // Set the tensor shapes for LHS and RHS matrices
1263 const TensorShape lhs_shape(k, m, batch_size);
1264 const TensorShape rhs_shape(n, k, batch_size);
1265
1266 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
1267 _reference = compute_reference(lhs_shape, rhs_shape, data_type);
1268 }
1269
1270 protected:
1271 template <typename U>
fill(U && tensor,int i)1272 void fill(U &&tensor, int i)
1273 {
1274 switch(tensor.data_type())
1275 {
1276 case DataType::QASYMM8:
1277 {
1278 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1279 std::uniform_int_distribution<> distribution(1, 254);
1280 library->fill(tensor, distribution, i);
1281 }
1282 break;
1283 case DataType::QASYMM8_SIGNED:
1284 {
1285 std::uniform_int_distribution<> distribution(-127, 126);
1286 library->fill(tensor, distribution, i);
1287 }
1288 break;
1289 default:
1290 ARM_COMPUTE_ERROR("Unsupported data type");
1291 }
1292 }
1293
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type)1294 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
1295 const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
1296 {
1297 // Create tensors
1298 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1299 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1300 TensorType rhs_reshaped;
1301 TensorType dst;
1302
1303 const unsigned int M = lhs_shape[1];
1304 const unsigned int N = rhs_shape[0];
1305 const unsigned int K = lhs_shape[0];
1306
1307 GEMMKernelInfo gemm_info;
1308 gemm_info.m = M;
1309 gemm_info.n = N;
1310 gemm_info.k = K;
1311 gemm_info.lhs_info = lhs_info;
1312 gemm_info.rhs_info = rhs_info;
1313 // The output tensor will be auto-initialized within the function
1314
1315 // Create and configure function
1316 ReshapeRHSOperatorType reshape_rhs;
1317 GEMMFunctionType gemm;
1318 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1319 gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
1320
1321 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1322 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1323
1324 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &dst });
1325
1326 // Allocate tensors
1327 lhs.allocator()->allocate();
1328 rhs.allocator()->allocate();
1329 rhs_reshaped.allocator()->allocate();
1330 dst.allocator()->allocate();
1331
1332 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1333 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1334 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1335 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1336
1337 // Fill tensors
1338 fill(AccessorType(lhs), 0);
1339 fill(AccessorType(rhs), 1);
1340
1341 // Compute GEMM
1342 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1343 reshape_rhs.run(reshape_rhs_pack);
1344 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1345 gemm.run(gemm_pack);
1346
1347 return dst;
1348 }
1349
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type)1350 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
1351 {
1352 TensorShape dst_shape = lhs_shape;
1353 dst_shape[0] = rhs_shape[0];
1354 dst_shape[1] = lhs_shape[1];
1355
1356 if(data_type == DataType::QASYMM8)
1357 {
1358 // Create reference
1359 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1360 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1361
1362 // Fill reference
1363 fill(lhs, 0);
1364 fill(rhs, 1);
1365
1366 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1367 }
1368 else
1369 {
1370 // Create reference
1371 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1372 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1373
1374 // Fill reference
1375 fill(lhs, 0);
1376 fill(rhs, 1);
1377
1378 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1379 }
1380 }
1381
1382 TensorType _target{};
1383 SimpleTensor<int32_t> _reference{};
1384 };
1385
1386 template <typename T, typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType, typename ReduceOperation, typename CastOperation>
1387 class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture : public framework::Fixture
1388 {
1389 public:
1390 template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,bool broadcast_bias,DataType data_type)1391 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1392 unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, bool broadcast_bias, DataType data_type)
1393 {
1394 GEMMLowpOutputStageInfo output_stage;
1395 output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
1396 output_stage.output_data_type = data_type;
1397 output_stage.gemmlowp_multipliers = std::vector<int32_t> { 1 };
1398 output_stage.gemmlowp_shifts = std::vector<int32_t> { 1 };
1399 output_stage.gemmlowp_multipliers[0] = 1;
1400 output_stage.gemmlowp_shifts[0] = 1;
1401 output_stage.gemmlowp_offset = 0;
1402 constexpr float scale = 0.001f;
1403 quantization::calculate_quantized_multiplier(scale, &output_stage.gemmlowp_multipliers[0], &output_stage.gemmlowp_shifts[0]);
1404 output_stage.gemmlowp_min_bound = -100;
1405 output_stage.gemmlowp_max_bound = 100;
1406
1407 GEMMLHSMatrixInfo lhs_info;
1408 lhs_info.m0 = m0;
1409 lhs_info.k0 = k0;
1410
1411 GEMMRHSMatrixInfo rhs_info;
1412 rhs_info.n0 = n0;
1413 rhs_info.k0 = k0;
1414 rhs_info.h0 = h0;
1415 rhs_info.interleave = interleave_rhs;
1416 rhs_info.transpose = transpose_rhs;
1417
1418 int a_offset = 1;
1419 int b_offset = 1;
1420
1421 // Set the tensor shapes for LHS and RHS matrices
1422 const TensorShape lhs_shape(k, m, batch_size);
1423 const TensorShape rhs_shape(n, k, batch_size);
1424 const TensorShape bias_shape(n,
1425 broadcast_bias ? 1 : m,
1426 broadcast_bias ? 1 : batch_size);
1427
1428 _target = compute_target(lhs_shape, rhs_shape, bias_shape, lhs_info, rhs_info, data_type, output_stage, a_offset, b_offset);
1429 if(gemm_validated == true)
1430 {
1431 _reference = compute_reference(lhs_shape, rhs_shape, bias_shape, data_type, output_stage, a_offset, b_offset);
1432 }
1433 }
1434
1435 protected:
1436 template <typename U>
fill(U && tensor,int i)1437 void fill(U &&tensor, int i)
1438 {
1439 switch(tensor.data_type())
1440 {
1441 case DataType::QASYMM8:
1442 {
1443 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1444 std::uniform_int_distribution<> distribution(1, 254);
1445 library->fill(tensor, distribution, i);
1446 }
1447 break;
1448 case DataType::QASYMM8_SIGNED:
1449 {
1450 std::uniform_int_distribution<> distribution(-127, 126);
1451 library->fill(tensor, distribution, i);
1452 }
1453 break;
1454 case DataType::S32:
1455 {
1456 std::uniform_int_distribution<> distribution(-10000, 10000);
1457 library->fill(tensor, distribution, i);
1458 }
1459 break;
1460 default:
1461 ARM_COMPUTE_ERROR("Unsupported data type");
1462 }
1463 }
1464
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type,GEMMLowpOutputStageInfo output_stage,const int a_offset,const int b_offset)1465 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, const GEMMLHSMatrixInfo &lhs_info,
1466 const GEMMRHSMatrixInfo &rhs_info, DataType data_type, GEMMLowpOutputStageInfo output_stage, const int a_offset, const int b_offset)
1467 {
1468 // Create tensors
1469 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset));
1470 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset));
1471 TensorType bias = create_tensor<TensorType>(bias_shape, DataType::S32, 1);
1472 TensorType dst;
1473 TensorType rhs_reshaped;
1474
1475 const unsigned int M = lhs_shape[1];
1476 const unsigned int N = rhs_shape[0];
1477 const unsigned int K = lhs_shape[0];
1478
1479 // Tensors for precomputing sum of lhs rows / rhs columns
1480 TensorType vec_sum_rows = create_tensor<TensorType>(TensorShape(M, 1, lhs_shape[2]), DataType::S32, 1);
1481 TensorType vec_sum_cols = create_tensor<TensorType>(TensorShape(N, 1, rhs_shape[2]), DataType::S32, 1);
1482
1483 GEMMKernelInfo gemm_info;
1484 gemm_info.m = M;
1485 gemm_info.n = N;
1486 gemm_info.k = K;
1487 gemm_info.lhs_info = lhs_info;
1488 gemm_info.rhs_info = rhs_info;
1489 gemm_info.output_stage = output_stage;
1490 gemm_info.a_offset = a_offset;
1491 gemm_info.b_offset = b_offset;
1492 // The output tensor will be auto-initialized within the function
1493
1494 // Create and configure function
1495 ReshapeRHSOperatorType reshape_rhs;
1496 GEMMFunctionType gemm;
1497 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1498
1499 // If GEMM is not validated, do not try to run. The validation will check
1500 // if the technology supports this extension. If not, the test will be skipped.
1501 // If it supports, the test will fail anyway because target and reference
1502 // will not match.
1503 gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info()));
1504 if(gemm_validated == true)
1505 {
1506 gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, vec_sum_cols.info(), vec_sum_rows.info(), bias.info());
1507
1508 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1509 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1510 ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
1511
1512 // Allocate tensors
1513 lhs.allocator()->allocate();
1514 rhs.allocator()->allocate();
1515 rhs_reshaped.allocator()->allocate();
1516 bias.allocator()->allocate();
1517 vec_sum_cols.allocator()->allocate();
1518 vec_sum_rows.allocator()->allocate();
1519 dst.allocator()->allocate();
1520
1521 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1522 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1523 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1524 ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
1525 ARM_COMPUTE_ASSERT(!vec_sum_cols.info()->is_resizable());
1526 ARM_COMPUTE_ASSERT(!vec_sum_rows.info()->is_resizable());
1527 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1528
1529 // Fill tensors
1530 fill(AccessorType(lhs), 0);
1531 fill(AccessorType(rhs), 1);
1532 fill(AccessorType(bias), 2);
1533
1534 TensorType lhs_32 = create_tensor<TensorType>(lhs_shape, DataType::S32, 1);
1535 TensorType rhs_32 = create_tensor<TensorType>(rhs_shape, DataType::S32, 1);
1536 CastOperation cast_lhs;
1537 CastOperation cast_rhs;
1538 cast_lhs.configure(&lhs, &lhs_32, ConvertPolicy::SATURATE);
1539 cast_rhs.configure(&rhs, &rhs_32, ConvertPolicy::SATURATE);
1540 lhs_32.allocator()->allocate();
1541 rhs_32.allocator()->allocate();
1542 cast_lhs.run();
1543 cast_rhs.run();
1544
1545 ReduceOperation lhs_sum_rows;
1546 ReduceOperation rhs_sum_cols;
1547
1548 lhs_sum_rows.configure(&lhs_32, &vec_sum_rows, 0, ReductionOperation::SUM, false);
1549 rhs_sum_cols.configure(&rhs_32, &vec_sum_cols, 1, ReductionOperation::SUM);
1550
1551 lhs_sum_rows.run();
1552 rhs_sum_cols.run();
1553
1554 // Compute GEMM
1555 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1556 reshape_rhs.run(reshape_rhs_pack);
1557 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_SRC_2, &bias }, { ACL_DST, &dst }, { ACL_VEC_COL_SUM, &vec_sum_cols }, { ACL_VEC_ROW_SUM, &vec_sum_rows } });
1558 gemm.run(gemm_pack);
1559 }
1560
1561 return dst;
1562 }
1563
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const TensorShape & bias_shape,DataType data_type,GEMMLowpOutputStageInfo output_stage,const int a_offset,const int b_offset)1564 SimpleTensor<T> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const TensorShape &bias_shape, DataType data_type, GEMMLowpOutputStageInfo output_stage,
1565 const int a_offset, const int b_offset)
1566 {
1567 TensorShape dst_shape = lhs_shape;
1568 dst_shape[0] = rhs_shape[0];
1569 dst_shape[1] = lhs_shape[1];
1570
1571 // Create reference
1572 SimpleTensor<T> lhs{ lhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, a_offset) };
1573 SimpleTensor<T> rhs{ rhs_shape, data_type, 1, QuantizationInfo(1.0f / 255, b_offset) };
1574 SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
1575 SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
1576 SimpleTensor<T> dst_final{ dst_shape, data_type, 1 };
1577
1578 // Fill reference
1579 fill(lhs, 0);
1580 fill(rhs, 1);
1581 fill(bias, 2);
1582
1583 dst = reference::gemmlowp_matrix_multiply_core<int32_t, T>(lhs, rhs, dst_shape, a_offset, b_offset);
1584 dst_final = reference::gemmlowp_quantize_down_scale_by_fixedpoint<int32_t, T>(dst, bias,
1585 output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound);
1586 return dst_final;
1587 }
1588
1589 bool gemm_validated = true;
1590 TensorType _target{};
1591 SimpleTensor<T> _reference{};
1592 };
1593
1594 template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1595 class GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture : public framework::Fixture
1596 {
1597 public:
1598 template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,DataType data_type)1599 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1600 unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
1601 {
1602 GEMMLHSMatrixInfo lhs_info;
1603 lhs_info.m0 = m0;
1604 lhs_info.k0 = k0;
1605
1606 GEMMRHSMatrixInfo rhs_info;
1607 rhs_info.n0 = n0;
1608 rhs_info.k0 = k0;
1609 rhs_info.h0 = h0;
1610 rhs_info.interleave = interleave_rhs;
1611 rhs_info.transpose = transpose_rhs;
1612
1613 // Set the tensor shapes for LHS and RHS matrices
1614 const TensorShape lhs_shape(k, m, batch_size);
1615 const TensorShape rhs_shape(n, k, batch_size);
1616
1617 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, data_type);
1618 if(gemm_validated == true)
1619 {
1620 _reference = compute_reference(lhs_shape, rhs_shape, data_type);
1621 }
1622 }
1623
1624 protected:
1625 template <typename U>
fill(U && tensor,int i)1626 void fill(U &&tensor, int i)
1627 {
1628 switch(tensor.data_type())
1629 {
1630 case DataType::QASYMM8:
1631 {
1632 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1633 std::uniform_int_distribution<> distribution(1, 254);
1634 library->fill(tensor, distribution, i);
1635 }
1636 break;
1637 case DataType::QASYMM8_SIGNED:
1638 {
1639 std::uniform_int_distribution<> distribution(-127, 126);
1640 library->fill(tensor, distribution, i);
1641 }
1642 break;
1643 default:
1644 ARM_COMPUTE_ERROR("Unsupported data type");
1645 }
1646 }
1647
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,DataType data_type)1648 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
1649 const GEMMRHSMatrixInfo &rhs_info, DataType data_type)
1650 {
1651 // Create tensors
1652 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1653 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1654 TensorType rhs_reshaped;
1655 TensorType dst;
1656
1657 const unsigned int M = lhs_shape[1];
1658 const unsigned int N = rhs_shape[0];
1659 const unsigned int K = lhs_shape[0];
1660
1661 GEMMKernelInfo gemm_info;
1662 gemm_info.m = M;
1663 gemm_info.n = N;
1664 gemm_info.k = K;
1665 gemm_info.lhs_info = lhs_info;
1666 gemm_info.rhs_info = rhs_info;
1667 // The output tensor will be auto-initialized within the function
1668
1669 // Create and configure function
1670 ReshapeRHSOperatorType reshape_rhs;
1671 GEMMFunctionType gemm;
1672 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1673
1674 // If GEMM is not validated, do not try to run. The validation will check
1675 // if the technology supports this extension. If not, the test will be skipped.
1676 // If it supports, the test will fail anyway because target and reference
1677 // will not match.
1678 gemm_validated = bool(gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr));
1679 if(gemm_validated == true)
1680 {
1681 gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, nullptr, nullptr, nullptr);
1682
1683 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1684 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1685
1686 // Allocate tensors
1687 lhs.allocator()->allocate();
1688 rhs.allocator()->allocate();
1689 rhs_reshaped.allocator()->allocate();
1690 dst.allocator()->allocate();
1691
1692 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1693 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1694 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1695 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1696
1697 // Fill tensors
1698 fill(AccessorType(lhs), 0);
1699 fill(AccessorType(rhs), 1);
1700
1701 // Compute GEMM
1702 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1703 reshape_rhs.run(reshape_rhs_pack);
1704 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1705 gemm.run(gemm_pack);
1706 }
1707
1708 return dst;
1709 }
1710
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,DataType data_type)1711 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, DataType data_type)
1712 {
1713 TensorShape dst_shape = lhs_shape;
1714 dst_shape[0] = rhs_shape[0];
1715 dst_shape[1] = lhs_shape[1];
1716
1717 if(data_type == DataType::QASYMM8)
1718 {
1719 // Create reference
1720 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1721 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1722 SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
1723
1724 // Fill reference
1725 fill(lhs, 0);
1726 fill(rhs, 1);
1727
1728 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1729 }
1730 else
1731 {
1732 // Create reference
1733 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1734 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1735 SimpleTensor<int32_t> dst{ dst_shape, DataType::S32, 1 };
1736
1737 // Fill reference
1738 fill(lhs, 0);
1739 fill(rhs, 1);
1740
1741 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1742 }
1743 }
1744
1745 bool gemm_validated = true;
1746 TensorType _target{};
1747 SimpleTensor<int32_t> _reference{};
1748 };
1749
1750 template <typename TensorType, typename AccessorType, typename ReshapeRHSOperatorType, typename GEMMFunctionType>
1751 class GEMMLowpMatrixMultiplyReshapedOnlyRHS3DValidationFixture : public framework::Fixture
1752 {
1753 public:
1754 template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0,unsigned int h0,bool interleave_rhs,bool transpose_rhs,DataType data_type)1755 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0,
1756 unsigned int k0, unsigned int h0, bool interleave_rhs, bool transpose_rhs, DataType data_type)
1757 {
1758 GEMMLHSMatrixInfo lhs_info;
1759 lhs_info.m0 = m0;
1760 lhs_info.k0 = k0;
1761
1762 GEMMRHSMatrixInfo rhs_info;
1763 rhs_info.n0 = n0;
1764 rhs_info.k0 = k0;
1765 rhs_info.h0 = h0;
1766 rhs_info.interleave = interleave_rhs;
1767 rhs_info.transpose = transpose_rhs;
1768
1769 // In case of GEMM3D, m is the product between m_w and m_h
1770 const unsigned int m = m_w * m_h;
1771
1772 // Set the tensor shapes for LHS and RHS matrices
1773 const TensorShape lhs_shape(k, m, batch_size);
1774 const TensorShape rhs_shape(n, k, batch_size);
1775
1776 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h, data_type);
1777 _reference = compute_reference(lhs_shape, rhs_shape, m_h, data_type);
1778 }
1779
1780 protected:
1781 template <typename U>
fill(U && tensor,int i)1782 void fill(U &&tensor, int i)
1783 {
1784 switch(tensor.data_type())
1785 {
1786 case DataType::QASYMM8:
1787 {
1788 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1789 std::uniform_int_distribution<> distribution(1, 254);
1790 library->fill(tensor, distribution, i);
1791 }
1792 break;
1793 case DataType::QASYMM8_SIGNED:
1794 {
1795 std::uniform_int_distribution<> distribution(-127, 126);
1796 library->fill(tensor, distribution, i);
1797 }
1798 break;
1799 default:
1800 ARM_COMPUTE_ERROR("Unsupported data type");
1801 }
1802 }
1803
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,unsigned int m_h,DataType data_type)1804 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info,
1805 const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h, DataType data_type)
1806 {
1807 // Create tensors
1808 TensorType lhs = create_tensor<TensorType>(lhs_shape, data_type, 1);
1809 TensorType rhs = create_tensor<TensorType>(rhs_shape, data_type, 1);
1810 TensorType rhs_reshaped;
1811 TensorType dst;
1812
1813 const unsigned int M = lhs_shape[1];
1814 const unsigned int N = rhs_shape[0];
1815 const unsigned int K = lhs_shape[0];
1816
1817 GEMMKernelInfo gemm_info;
1818 gemm_info.m = M;
1819 gemm_info.n = N;
1820 gemm_info.k = K;
1821 gemm_info.depth_output_gemm3d = m_h;
1822 gemm_info.lhs_info = lhs_info;
1823 gemm_info.rhs_info = rhs_info;
1824 // The output tensor will be auto-initialized within the function
1825
1826 // Create and configure function
1827 ReshapeRHSOperatorType reshape_rhs;
1828 GEMMFunctionType gemm;
1829 reshape_rhs.configure(rhs.info(), rhs_reshaped.info(), rhs_info);
1830 gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
1831
1832 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1833 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1834
1835 add_padding_x({ &lhs, &rhs, &rhs_reshaped, &dst });
1836
1837 // Allocate tensors
1838 lhs.allocator()->allocate();
1839 rhs.allocator()->allocate();
1840 rhs_reshaped.allocator()->allocate();
1841 dst.allocator()->allocate();
1842
1843 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1844 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1845 ARM_COMPUTE_ASSERT(!rhs_reshaped.info()->is_resizable());
1846 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1847
1848 // Fill tensors
1849 fill(AccessorType(lhs), 0);
1850 fill(AccessorType(rhs), 1);
1851
1852 // Compute GEMM
1853 ITensorPack reshape_rhs_pack = { { ACL_SRC, &rhs }, { ACL_DST, &rhs_reshaped } };
1854 reshape_rhs.run(reshape_rhs_pack);
1855 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs_reshaped }, { ACL_DST, &dst } });
1856 gemm.run(gemm_pack);
1857
1858 return dst;
1859 }
1860
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,unsigned int m_h,DataType data_type)1861 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h, DataType data_type)
1862 {
1863 TensorShape dst_shape = lhs_shape;
1864 dst_shape.set(0, rhs_shape[0]);
1865 dst_shape.set(1, lhs_shape[1] / m_h);
1866 dst_shape.set(2, m_h);
1867 dst_shape.set(3, lhs_shape[2]);
1868
1869 if(data_type == DataType::QASYMM8)
1870 {
1871 // Create reference
1872 SimpleTensor<uint8_t> lhs{ lhs_shape, data_type, 1 };
1873 SimpleTensor<uint8_t> rhs{ rhs_shape, data_type, 1 };
1874
1875 // Fill reference
1876 fill(lhs, 0);
1877 fill(rhs, 1);
1878
1879 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1880 }
1881 else
1882 {
1883 // Create reference
1884 SimpleTensor<int8_t> lhs{ lhs_shape, data_type, 1 };
1885 SimpleTensor<int8_t> rhs{ rhs_shape, data_type, 1 };
1886
1887 // Fill reference
1888 fill(lhs, 0);
1889 fill(rhs, 1);
1890
1891 return reference::gemmlowp_matrix_multiply_core<int32_t, int8_t>(lhs, rhs, dst_shape, 0, 0);
1892 }
1893 }
1894
1895 TensorType _target{};
1896 SimpleTensor<int32_t> _reference{};
1897 };
1898
1899 template <typename TensorType, typename AccessorType, typename GEMMFunctionType>
1900 class GEMMLowpMatrixMultiplyNativeValidationFixture : public framework::Fixture
1901 {
1902 public:
1903 template <typename...>
setup(unsigned int m,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0)1904 void setup(unsigned int m, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0)
1905 {
1906 GEMMLHSMatrixInfo lhs_info;
1907 lhs_info.m0 = m0;
1908 lhs_info.k0 = k0;
1909
1910 GEMMRHSMatrixInfo rhs_info;
1911 rhs_info.n0 = n0;
1912 rhs_info.k0 = k0;
1913
1914 // Set the tensor shapes for LHS and RHS matrices
1915 const TensorShape lhs_shape(k, m, batch_size);
1916 const TensorShape rhs_shape(n, k, batch_size);
1917
1918 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info);
1919 _reference = compute_reference(lhs_shape, rhs_shape);
1920 }
1921
1922 protected:
1923 template <typename U>
fill(U && tensor,int i)1924 void fill(U &&tensor, int i)
1925 {
1926 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
1927 std::uniform_int_distribution<> distribution(1, 254);
1928 library->fill(tensor, distribution, i);
1929 }
1930
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info)1931 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info)
1932 {
1933 // Create tensors
1934 TensorType lhs = create_tensor<TensorType>(lhs_shape, DataType::QASYMM8, 1);
1935 TensorType rhs = create_tensor<TensorType>(rhs_shape, DataType::QASYMM8, 1);
1936 TensorType dst;
1937
1938 const unsigned int M = lhs_shape[1];
1939 const unsigned int N = rhs_shape[0];
1940 const unsigned int K = lhs_shape[0];
1941
1942 // The output tensor will be auto-initialized within the function
1943
1944 // Create and configure function
1945 GEMMFunctionType gemm;
1946 gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K));
1947
1948 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
1949 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
1950
1951 add_padding_x({ &lhs, &rhs, &dst });
1952
1953 // Allocate tensors
1954 lhs.allocator()->allocate();
1955 rhs.allocator()->allocate();
1956 dst.allocator()->allocate();
1957
1958 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
1959 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
1960 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
1961
1962 // Fill tensors
1963 fill(AccessorType(lhs), 0);
1964 fill(AccessorType(rhs), 1);
1965
1966 // Compute GEMM
1967 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } });
1968 gemm.run(gemm_pack);
1969
1970 return dst;
1971 }
1972
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape)1973 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape)
1974 {
1975 TensorShape dst_shape = lhs_shape;
1976 dst_shape[0] = rhs_shape[0];
1977 dst_shape[1] = lhs_shape[1];
1978
1979 // Create reference
1980 SimpleTensor<uint8_t> lhs{ lhs_shape, DataType::QASYMM8, 1 };
1981 SimpleTensor<uint8_t> rhs{ rhs_shape, DataType::QASYMM8, 1 };
1982
1983 // Fill reference
1984 fill(lhs, 0);
1985 fill(rhs, 1);
1986
1987 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
1988 }
1989
1990 TensorType _target{};
1991 SimpleTensor<int32_t> _reference{};
1992 };
1993
1994 template <typename TensorType, typename AccessorType, typename GEMMFunctionType>
1995 class GEMMLowpMatrixMultiplyNative3DValidationFixture : public framework::Fixture
1996 {
1997 public:
1998 template <typename...>
setup(unsigned int m_w,unsigned int m_h,unsigned int n,unsigned int k,unsigned int batch_size,unsigned int m0,unsigned int n0,unsigned int k0)1999 void setup(unsigned int m_w, unsigned int m_h, unsigned int n, unsigned int k, unsigned int batch_size, unsigned int m0, unsigned int n0, unsigned int k0)
2000 {
2001 GEMMLHSMatrixInfo lhs_info;
2002 lhs_info.m0 = m0;
2003 lhs_info.k0 = k0;
2004
2005 GEMMRHSMatrixInfo rhs_info;
2006 rhs_info.n0 = n0;
2007 rhs_info.k0 = k0;
2008
2009 // In case of GEMM3D, m is the product between m_w and m_h
2010 const unsigned int m = m_w * m_h;
2011
2012 // Set the tensor shapes for LHS and RHS matrices
2013 const TensorShape lhs_shape(k, m, batch_size);
2014 const TensorShape rhs_shape(n, k, batch_size);
2015
2016 _target = compute_target(lhs_shape, rhs_shape, lhs_info, rhs_info, m_h);
2017 _reference = compute_reference(lhs_shape, rhs_shape, m_h);
2018 }
2019
2020 protected:
2021 template <typename U>
fill(U && tensor,int i)2022 void fill(U &&tensor, int i)
2023 {
2024 // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path
2025 std::uniform_int_distribution<> distribution(1, 254);
2026 library->fill(tensor, distribution, i);
2027 }
2028
compute_target(const TensorShape & lhs_shape,const TensorShape & rhs_shape,const GEMMLHSMatrixInfo & lhs_info,const GEMMRHSMatrixInfo & rhs_info,unsigned int m_h)2029 TensorType compute_target(const TensorShape &lhs_shape, const TensorShape &rhs_shape, const GEMMLHSMatrixInfo &lhs_info, const GEMMRHSMatrixInfo &rhs_info, unsigned int m_h)
2030 {
2031 // Create tensors
2032 TensorType lhs = create_tensor<TensorType>(lhs_shape, DataType::QASYMM8, 1);
2033 TensorType rhs = create_tensor<TensorType>(rhs_shape, DataType::QASYMM8, 1);
2034 TensorType dst;
2035
2036 const unsigned int M = lhs_shape[1];
2037 const unsigned int N = rhs_shape[0];
2038 const unsigned int K = lhs_shape[0];
2039
2040 // The output tensor will be auto-initialized within the function
2041
2042 // Create and configure function
2043 GEMMFunctionType gemm;
2044 gemm.configure(lhs.info(), rhs.info(), dst.info(), lhs_info, rhs_info, GEMMReshapeInfo(M, N, K, 1, 1, m_h));
2045
2046 ARM_COMPUTE_ASSERT(lhs.info()->is_resizable());
2047 ARM_COMPUTE_ASSERT(rhs.info()->is_resizable());
2048
2049 add_padding_x({ &lhs, &rhs, &dst });
2050
2051 // Allocate tensors
2052 lhs.allocator()->allocate();
2053 rhs.allocator()->allocate();
2054 dst.allocator()->allocate();
2055
2056 ARM_COMPUTE_ASSERT(!lhs.info()->is_resizable());
2057 ARM_COMPUTE_ASSERT(!rhs.info()->is_resizable());
2058 ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
2059
2060 // Fill tensors
2061 fill(AccessorType(lhs), 0);
2062 fill(AccessorType(rhs), 1);
2063
2064 // Compute GEMM
2065 ITensorPack gemm_pack({ { ACL_SRC_0, &lhs }, { ACL_SRC_1, &rhs }, { ACL_DST, &dst } });
2066 gemm.run(gemm_pack);
2067
2068 return dst;
2069 }
2070
compute_reference(const TensorShape & lhs_shape,const TensorShape & rhs_shape,unsigned int m_h)2071 SimpleTensor<int32_t> compute_reference(const TensorShape &lhs_shape, const TensorShape &rhs_shape, unsigned int m_h)
2072 {
2073 TensorShape dst_shape = lhs_shape;
2074 dst_shape.set(0, rhs_shape[0]);
2075 dst_shape.set(1, lhs_shape[1] / m_h);
2076 dst_shape.set(2, m_h);
2077 dst_shape.set(3, lhs_shape[2]);
2078
2079 // Create reference
2080 SimpleTensor<uint8_t> lhs{ lhs_shape, DataType::QASYMM8, 1 };
2081 SimpleTensor<uint8_t> rhs{ rhs_shape, DataType::QASYMM8, 1 };
2082
2083 // Fill reference
2084 fill(lhs, 0);
2085 fill(rhs, 1);
2086
2087 return reference::gemmlowp_matrix_multiply_core<int32_t, uint8_t>(lhs, rhs, dst_shape, 0, 0);
2088 }
2089
2090 TensorType _target{};
2091 SimpleTensor<int32_t> _reference{};
2092 };
2093 } // namespace validation
2094 } // namespace test
2095 } // namespace arm_compute
2096 #endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
2097