• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #pragma once
10 
11 #include <gtest/gtest.h>
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <cmath>
16 #include <cstddef>
17 #include <cstdlib>
18 #include <functional>
19 #include <limits>
20 #include <random>
21 #include <vector>
22 
23 #include <fp16.h>
24 
25 #include <xnnpack.h>
26 #include <xnnpack/AlignedAllocator.h>
27 #include <xnnpack/params-init.h>
28 #include <xnnpack/params.h>
29 #include <xnnpack/requantization.h>
30 
31 
32 class GAvgPoolMicrokernelTester {
33  public:
rows(size_t rows)34   inline GAvgPoolMicrokernelTester& rows(size_t rows) {
35     assert(rows != 0);
36     this->rows_ = rows;
37     return *this;
38   }
39 
rows()40   inline size_t rows() const {
41     return this->rows_;
42   }
43 
channels(size_t channels)44   inline GAvgPoolMicrokernelTester& channels(size_t channels) {
45     assert(channels != 0);
46     this->channels_ = channels;
47     return *this;
48   }
49 
channels()50   inline size_t channels() const {
51     return this->channels_;
52   }
53 
channel_tile(size_t channel_tile)54   inline GAvgPoolMicrokernelTester& channel_tile(size_t channel_tile) {
55     assert(channel_tile != 0);
56     this->channel_tile_ = channel_tile;
57     return *this;
58   }
59 
channel_tile()60   inline size_t channel_tile() const {
61     return this->channel_tile_;
62   }
63 
input_stride(size_t input_stride)64   inline GAvgPoolMicrokernelTester& input_stride(size_t input_stride) {
65     assert(input_stride != 0);
66     this->input_stride_ = input_stride;
67     return *this;
68   }
69 
input_stride()70   inline size_t input_stride() const {
71     if (this->input_stride_ == 0) {
72       return channels();
73     } else {
74       assert(this->input_stride_ >= channels());
75       return this->input_stride_;
76     }
77   }
78 
input_scale(float input_scale)79   inline GAvgPoolMicrokernelTester& input_scale(float input_scale) {
80     assert(input_scale > 0.0f);
81     assert(std::isnormal(input_scale));
82     this->input_scale_ = input_scale;
83     return *this;
84   }
85 
input_scale()86   inline float input_scale() const {
87     return this->input_scale_;
88   }
89 
input_zero_point(uint8_t input_zero_point)90   inline GAvgPoolMicrokernelTester& input_zero_point(uint8_t input_zero_point) {
91     this->input_zero_point_ = input_zero_point;
92     return *this;
93   }
94 
input_zero_point()95   inline uint8_t input_zero_point() const {
96     return this->input_zero_point_;
97   }
98 
output_scale(float output_scale)99   inline GAvgPoolMicrokernelTester& output_scale(float output_scale) {
100     assert(output_scale > 0.0f);
101     assert(std::isnormal(output_scale));
102     this->output_scale_ = output_scale;
103     return *this;
104   }
105 
output_scale()106   inline float output_scale() const {
107     return this->output_scale_;
108   }
109 
output_zero_point(uint8_t output_zero_point)110   inline GAvgPoolMicrokernelTester& output_zero_point(uint8_t output_zero_point) {
111     this->output_zero_point_ = output_zero_point;
112     return *this;
113   }
114 
output_zero_point()115   inline uint8_t output_zero_point() const {
116     return this->output_zero_point_;
117   }
118 
qmin(uint8_t qmin)119   inline GAvgPoolMicrokernelTester& qmin(uint8_t qmin) {
120     this->qmin_ = qmin;
121     return *this;
122   }
123 
qmin()124   inline uint8_t qmin() const {
125     return this->qmin_;
126   }
127 
qmax(uint8_t qmax)128   inline GAvgPoolMicrokernelTester& qmax(uint8_t qmax) {
129     this->qmax_ = qmax;
130     return *this;
131   }
132 
qmax()133   inline uint8_t qmax() const {
134     return this->qmax_;
135   }
136 
iterations(size_t iterations)137   inline GAvgPoolMicrokernelTester& iterations(size_t iterations) {
138     this->iterations_ = iterations;
139     return *this;
140   }
141 
iterations()142   inline size_t iterations() const {
143     return this->iterations_;
144   }
145 
Test(xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)146   void Test(
147       xnn_qu8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,
148       xnn_init_qu8_avgpool_minmax_params_fn init_params,
149       xnn_qu8_requantize_fn requantize) const
150   {
151     std::random_device random_device;
152     auto rng = std::mt19937(random_device());
153     auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
154 
155     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
156       (rows() - 1) * input_stride() + channels());
157     std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
158     std::vector<uint8_t> output(channels());
159     std::vector<uint8_t> output_ref(channels());
160     std::vector<float> output_fp(channels());
161     std::vector<int32_t> accumulators(channels());
162     for (size_t iteration = 0; iteration < iterations(); iteration++) {
163       std::generate(input.begin(), input.end(), std::ref(u8rng));
164       std::fill(output.begin(), output.end(), 0xA5);
165 
166       // Prepare parameters.
167       union xnn_qu8_avgpool_minmax_params params;
168       init_params(
169         &params,
170         -int32_t(input_zero_point()) * int32_t(rows()),
171         input_scale() / (output_scale() * float(rows())),
172         output_zero_point(), qmin(), qmax());
173 
174       // Compute reference results.
175       for (size_t c = 0; c < channels(); c++) {
176         int32_t acc = 0;
177         for (size_t n = 0; n < rows(); n++) {
178           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point());
179         }
180         accumulators[c] = acc;
181         output_ref[c] = requantize(
182           acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax());
183         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point());
184         output_fp[c] = std::min<float>(output_fp[c], float(qmax()));
185         output_fp[c] = std::max<float>(output_fp[c], float(qmin()));
186       }
187 
188       // Call optimized micro-kernel.
189       gavgpool_minmax(rows(), channels(),
190         input.data(), input_stride() * sizeof(uint8_t),
191         zero.data(),
192         output.data(),
193         &params);
194 
195       // Verify results.
196       for (size_t c = 0; c < channels(); c++) {
197         ASSERT_LE(uint32_t(output[c]), uint32_t(qmax()))
198           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
199         ASSERT_GE(uint32_t(output[c]), uint32_t(qmin()))
200           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
201         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f)
202           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
203           << ", acc = " << accumulators[c];
204         ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c]))
205           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
206           << ", acc = " << accumulators[c];
207       }
208     }
209   }
210 
Test(xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qu8_avgpool_minmax_params_fn init_params,xnn_qu8_requantize_fn requantize)211   void Test(
212       xnn_qu8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,
213       xnn_init_qu8_avgpool_minmax_params_fn init_params,
214       xnn_qu8_requantize_fn requantize) const
215   {
216     std::random_device random_device;
217     auto rng = std::mt19937(random_device());
218     auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);
219 
220     std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +
221       (rows() - 1) * input_stride() + channels());
222     std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
223     std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));
224     std::vector<uint8_t> output(channels());
225     std::vector<uint8_t> output_ref(channels());
226     std::vector<float> output_fp(channels());
227     std::vector<int32_t> accumulators(channels());
228     for (size_t iteration = 0; iteration < iterations(); iteration++) {
229       std::generate(input.begin(), input.end(), std::ref(u8rng));
230       std::fill(output.begin(), output.end(), 0xA5);
231 
232       // Prepare parameters.
233       union xnn_qu8_avgpool_minmax_params params;
234       init_params(
235         &params,
236         -int32_t(input_zero_point()) * int32_t(rows()),
237         input_scale() / (output_scale() * float(rows())),
238         output_zero_point(), qmin(), qmax());
239 
240       // Compute reference results.
241       for (size_t c = 0; c < channels(); c++) {
242         int32_t acc = 0;
243         for (size_t n = 0; n < rows(); n++) {
244           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point());
245         }
246 
247         accumulators[c] = acc;
248         output_ref[c] = requantize(
249           acc, input_scale() / (output_scale() * float(rows())), output_zero_point(), qmin(), qmax());
250         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point());
251         output_fp[c] = std::min<float>(output_fp[c], float(qmax()));
252         output_fp[c] = std::max<float>(output_fp[c], float(qmin()));
253       }
254 
255       // Call optimized micro-kernel.
256       gavgpool_minmax(rows(), channels(),
257         input.data(), input_stride() * sizeof(uint8_t),
258         zero.data(),
259         buffer.data(),
260         output.data(),
261         &params);
262 
263       // Verify results.
264       for (size_t c = 0; c < channels(); c++) {
265         ASSERT_LE(uint32_t(output[c]), uint32_t(qmax()))
266           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
267         ASSERT_GE(uint32_t(output[c]), uint32_t(qmin()))
268           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
269         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f)
270           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
271           << ", acc = " << accumulators[c];
272         ASSERT_EQ(uint32_t(output_ref[c]), uint32_t(output[c]))
273           << "at position " << c << ", rows = " << rows() << ", channels = " << channels()
274           << ", acc = " << accumulators[c];
275       }
276     }
277   }
278 
Test(xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)279   void Test(
280       xnn_qs8_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,
281       xnn_init_qs8_avgpool_minmax_params_fn init_params,
282       xnn_qs8_requantize_fn requantize) const
283   {
284     std::random_device random_device;
285     auto rng = std::mt19937(random_device());
286     auto i8rng = std::bind(
287       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng);
288 
289     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +
290       (rows() - 1) * input_stride() + channels());
291     std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
292     std::vector<int8_t> output(channels());
293     std::vector<int8_t> output_ref(channels());
294     std::vector<float> output_fp(channels());
295     std::vector<int32_t> accumulators(channels());
296     for (size_t iteration = 0; iteration < iterations(); iteration++) {
297       std::generate(input.begin(), input.end(), std::ref(i8rng));
298       std::fill(output.begin(), output.end(), 0xA5);
299 
300       // Prepare parameters.
301       union xnn_qs8_avgpool_minmax_params params;
302       init_params(
303         &params,
304         -int32_t(input_zero_point() - 0x80) * int32_t(rows()),
305         input_scale() / (output_scale() * float(rows())),
306         int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
307 
308       // Compute reference results.
309       for (size_t c = 0; c < channels(); c++) {
310         int32_t acc = 0;
311         for (size_t n = 0; n < rows(); n++) {
312           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80);
313         }
314         accumulators[c] = acc;
315         output_ref[c] = requantize(
316           acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
317         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80);
318         output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80));
319         output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80));
320       }
321 
322       // Call optimized micro-kernel.
323       gavgpool_minmax(rows(), channels(),
324         input.data(), input_stride() * sizeof(int8_t),
325         zero.data(),
326         output.data(),
327         &params);
328 
329       // Verify results.
330       for (size_t c = 0; c < channels(); c++) {
331         ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80))
332           << "at channel " << c << " / " << channels() << ", rows = " << rows();
333         ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80))
334           << "at channel " << c << " / " << channels() << ", rows = " << rows();
335         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f)
336           << "at channel " << c << " / " << channels() << ", rows = " << rows()
337           << ", accumulator = " << accumulators[c];
338         ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c]))
339           << "at channel " << c << " / " << channels() << ", rows = " << rows()
340           << ", accumulator = " << accumulators[c];
341       }
342     }
343   }
344 
Test(xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_qs8_avgpool_minmax_params_fn init_params,xnn_qs8_requantize_fn requantize)345   void Test(
346       xnn_qs8_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,
347       xnn_init_qs8_avgpool_minmax_params_fn init_params,
348       xnn_qs8_requantize_fn requantize) const
349   {
350     std::random_device random_device;
351     auto rng = std::mt19937(random_device());
352     auto i8rng = std::bind(
353       std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng);
354 
355     std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +
356       (rows() - 1) * input_stride() + channels());
357     std::vector<int32_t, AlignedAllocator<int32_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
358     std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));
359     std::vector<int8_t> output(channels());
360     std::vector<int8_t> output_ref(channels());
361     std::vector<float> output_fp(channels());
362     std::vector<int32_t> accumulators(channels());
363     for (size_t iteration = 0; iteration < iterations(); iteration++) {
364       std::generate(input.begin(), input.end(), std::ref(i8rng));
365       std::fill(output.begin(), output.end(), 0xA5);
366 
367       // Prepare parameters.
368       union xnn_qs8_avgpool_minmax_params params;
369       init_params(
370         &params,
371         -int32_t(input_zero_point() - 0x80) * int32_t(rows()),
372         input_scale() / (output_scale() * float(rows())),
373         int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
374 
375       // Compute reference results.
376       for (size_t c = 0; c < channels(); c++) {
377         int32_t acc = 0;
378         for (size_t n = 0; n < rows(); n++) {
379           acc += int32_t(input[n * input_stride() + c]) - int32_t(input_zero_point() - 0x80);
380         }
381         accumulators[c] = acc;
382         output_ref[c] = requantize(
383           acc, input_scale() / (output_scale() * float(rows())), int8_t(output_zero_point() - 0x80), int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));
384         output_fp[c] = float(acc) * (input_scale() / (output_scale() * float(rows()))) + float(output_zero_point() - 0x80);
385         output_fp[c] = std::min<float>(output_fp[c], float(qmax() - 0x80));
386         output_fp[c] = std::max<float>(output_fp[c], float(qmin() - 0x80));
387       }
388 
389       // Call optimized micro-kernel.
390       gavgpool_minmax(rows(), channels(),
391         input.data(), input_stride() * sizeof(int8_t),
392         zero.data(),
393         buffer.data(),
394         output.data(),
395         &params);
396 
397       // Verify results.
398       for (size_t c = 0; c < channels(); c++) {
399         ASSERT_LE(int32_t(output[c]), int32_t(qmax() - 0x80))
400           << "at channel " << c << " / " << channels() << ", rows = " << rows();
401         ASSERT_GE(int32_t(output[c]), int32_t(qmin() - 0x80))
402           << "at channel " << c << " / " << channels() << ", rows = " << rows();
403         ASSERT_NEAR(float(int32_t(output[c])), output_fp[c], 0.5f)
404           << "at channel " << c << " / " << channels() << ", rows = " << rows()
405           << ", accumulator = " << accumulators[c];
406         ASSERT_EQ(int32_t(output_ref[c]), int32_t(output[c]))
407           << "at channel " << c << " / " << channels() << ", rows = " << rows()
408           << ", accumulator = " << accumulators[c];
409       }
410     }
411   }
412 
Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)413   void Test(xnn_f16_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const {
414     std::random_device random_device;
415     auto rng = std::mt19937(random_device());
416     auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
417     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
418 
419     std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
420     std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
421     std::vector<uint16_t> output(channels());
422     std::vector<float> output_ref(channels());
423 
424     std::fill(zero.begin(), zero.end(), 0);
425     for (size_t iteration = 0; iteration < iterations(); iteration++) {
426       std::generate(input.begin(), input.end(), std::ref(f16rng));
427       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
428 
429       // Compute reference results, without clamping.
430       for (size_t c = 0; c < channels(); c++) {
431         float acc = 0.0f;
432         for (size_t n = 0; n < rows(); n++) {
433           acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]);
434         }
435         output_ref[c] = acc / float(rows());
436       }
437 
438       // Compute clamping parameters.
439       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
440       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
441       const float accumulated_range = accumulated_max - accumulated_min;
442       const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range));
443       const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range));
444 
445       // Clamp reference results.
446       for (float& output_values : output_ref) {
447         output_values = std::max(std::min(output_values, output_max), output_min);
448       }
449 
450       // Prepare parameters.
451       xnn_f16_scaleminmax_params params;
452       init_params(&params,
453         fp16_ieee_from_fp32_value(1.0f / float(rows())),
454         fp16_ieee_from_fp32_value(output_min),
455         fp16_ieee_from_fp32_value(output_max));
456 
457       // Call optimized micro-kernel.
458       gavgpool_minmax(rows(), channels(),
459         input.data(), input_stride() * sizeof(uint16_t),
460         zero.data(),
461         output.data(),
462         &params);
463 
464       // Verify results.
465       for (size_t c = 0; c < channels(); c++) {
466         ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max)
467           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
468         ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min)
469           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
470         ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::max(1.0e-4f, std::abs(output_ref[c]) * 1.0e-2f))
471           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
472       }
473     }
474   }
475 
Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f16_scaleminmax_params_fn init_params)476   void Test(xnn_f16_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f16_scaleminmax_params_fn init_params) const {
477     std::random_device random_device;
478     auto rng = std::mt19937(random_device());
479     auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
480     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
481 
482     std::vector<uint16_t> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
483     std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
484     std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
485     std::vector<uint16_t> output(channels());
486     std::vector<float> output_ref(channels());
487     for (size_t iteration = 0; iteration < iterations(); iteration++) {
488       std::generate(input.begin(), input.end(), std::ref(f16rng));
489       std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);
490 
491       // Compute reference results, without clamping.
492       for (size_t c = 0; c < channels(); c++) {
493         float acc = 0.0f;
494         for (size_t n = 0; n < rows(); n++) {
495           acc += fp16_ieee_to_fp32_value(input[n * input_stride() + c]);
496         }
497         output_ref[c] = acc / float(rows());
498       }
499 
500       // Compute clamping parameters.
501       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
502       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
503       const float accumulated_range = accumulated_max - accumulated_min;
504       const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + float(qmin()) / 255.0f * accumulated_range));
505       const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range));
506 
507       // Prepare parameters.
508       xnn_f16_scaleminmax_params params;
509       init_params(&params,
510         fp16_ieee_from_fp32_value(1.0f / float(rows())),
511         fp16_ieee_from_fp32_value(output_min),
512         fp16_ieee_from_fp32_value(output_max));
513 
514       // Clamp reference results.
515       for (float& output_values : output_ref) {
516         output_values = std::max(std::min(output_values, output_max), output_min);
517       }
518 
519       // Call optimized micro-kernel.
520       gavgpool_minmax(rows(), channels(),
521         input.data(), input_stride() * sizeof(uint16_t),
522         zero.data(),
523         buffer.data(),
524         output.data(),
525         &params);
526 
527       // Verify results.
528       for (size_t c = 0; c < channels(); c++) {
529         ASSERT_LE(fp16_ieee_to_fp32_value(output[c]), output_max)
530           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
531         ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min)
532           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
533         ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::abs(output_ref[c]) * 1.0e-0f)
534           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
535       }
536     }
537   }
538 
Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)539   void Test(xnn_f32_gavgpool_minmax_unipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const {
540     std::random_device random_device;
541     auto rng = std::mt19937(random_device());
542     auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
543 
544     std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
545     std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float));
546     std::vector<float> output(channels());
547     std::vector<float> output_ref(channels());
548 
549     std::fill(zero.begin(), zero.end(), 0.0f);
550     for (size_t iteration = 0; iteration < iterations(); iteration++) {
551       std::generate(input.begin(), input.end(), std::ref(f32rng));
552       std::fill(output.begin(), output.end(), std::nanf(""));
553 
554       // Compute reference results, without clamping.
555       for (size_t c = 0; c < channels(); c++) {
556         float acc = 0.0f;
557         for (size_t n = 0; n < rows(); n++) {
558           acc += input[n * input_stride() + c];
559         }
560         output_ref[c] = acc / float(rows());
561       }
562 
563       // Compute clamping parameters.
564       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
565       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
566       const float accumulated_range = accumulated_max - accumulated_min;
567       const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
568       const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
569 
570       // Clamp reference results.
571       for (float& output_values : output_ref) {
572         output_values = std::max(std::min(output_values, output_max), output_min);
573       }
574 
575       // Prepare parameters.
576       union xnn_f32_scaleminmax_params params;
577       init_params(&params, 1.0f / float(rows()), output_min, output_max);
578 
579       // Call optimized micro-kernel.
580       gavgpool_minmax(rows(), channels(),
581         input.data(), input_stride() * sizeof(float),
582         zero.data(),
583         output.data(),
584         &params);
585 
586       // Verify results.
587       for (size_t c = 0; c < channels(); c++) {
588         ASSERT_LE(output[c], output_max)
589           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
590         ASSERT_GE(output[c], output_min)
591           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
592         ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f)
593           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
594       }
595     }
596   }
597 
Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax,xnn_init_f32_scaleminmax_params_fn init_params)598   void Test(xnn_f32_gavgpool_minmax_multipass_ukernel_function gavgpool_minmax, xnn_init_f32_scaleminmax_params_fn init_params) const {
599     std::random_device random_device;
600     auto rng = std::mt19937(random_device());
601     auto f32rng = std::bind(std::uniform_real_distribution<float>(), rng);
602 
603     std::vector<float> input((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(float));
604     std::vector<float, AlignedAllocator<float, 64>> buffer(channels() + XNN_EXTRA_BYTES / sizeof(float));
605     std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float));
606     std::vector<float> output(channels());
607     std::vector<float> output_ref(channels());
608     for (size_t iteration = 0; iteration < iterations(); iteration++) {
609       std::generate(input.begin(), input.end(), std::ref(f32rng));
610       std::fill(output.begin(), output.end(), std::nanf(""));
611 
612       // Compute reference results, without clamping.
613       for (size_t c = 0; c < channels(); c++) {
614         float acc = 0.0f;
615         for (size_t n = 0; n < rows(); n++) {
616           acc += input[n * input_stride() + c];
617         }
618         output_ref[c] = acc / float(rows());
619       }
620 
621       // Compute clamping parameters.
622       const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());
623       const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());
624       const float accumulated_range = accumulated_max - accumulated_min;
625       const float output_min = accumulated_min + float(qmin()) / 255.0f * accumulated_range;
626       const float output_max = accumulated_max - float(255 - qmax()) / 255.0f * accumulated_range;
627 
628       // Prepare parameters.
629       union xnn_f32_scaleminmax_params params;
630       init_params(&params, 1.0f / float(rows()), output_min, output_max);
631 
632       // Clamp reference results.
633       for (float& output_values : output_ref) {
634         output_values = std::max(std::min(output_values, output_max), output_min);
635       }
636 
637       // Call optimized micro-kernel.
638       gavgpool_minmax(rows(), channels(),
639         input.data(), input_stride() * sizeof(float),
640         zero.data(),
641         buffer.data(),
642         output.data(),
643         &params);
644 
645       // Verify results.
646       for (size_t c = 0; c < channels(); c++) {
647         ASSERT_LE(output[c], output_max)
648           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
649         ASSERT_GE(output[c], output_min)
650           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
651         ASSERT_NEAR(output[c], output_ref[c], std::abs(output_ref[c]) * 1.0e-6f)
652           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
653       }
654     }
655   }
656 
657  private:
658   size_t rows_{1};
659   size_t channels_{1};
660   size_t channel_tile_{1};
661   size_t input_stride_{0};
662   float input_scale_{1.25f};
663   float output_scale_{0.75f};
664   uint8_t input_zero_point_{121};
665   uint8_t output_zero_point_{133};
666   uint8_t qmin_{0};
667   uint8_t qmax_{255};
668   size_t iterations_{15};
669 };
670