• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/allocator.h>
21 #include <xnnpack/operator.h>
22 #include <xnnpack/common.h>
23 #include <xnnpack/log.h>
24 #include <xnnpack/math.h>
25 #include <xnnpack/microparams-init.h>
26 #include <xnnpack/params.h>
27 #include <xnnpack/indirection.h>
28 
29 
compute_output_dimension_with_tf_same_padding(size_t input_dimension,size_t stride_dimension)30 static inline size_t compute_output_dimension_with_tf_same_padding(
31     size_t input_dimension,
32     size_t stride_dimension)
33 {
34   return divide_round_up(input_dimension, stride_dimension);
35 }
36 
xnn_create_average_pooling2d_nhwc_qu8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)37 enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
38     uint32_t input_padding_top,
39     uint32_t input_padding_right,
40     uint32_t input_padding_bottom,
41     uint32_t input_padding_left,
42     uint32_t pooling_height,
43     uint32_t pooling_width,
44     uint32_t stride_height,
45     uint32_t stride_width,
46     size_t channels,
47     size_t input_pixel_stride,
48     size_t output_pixel_stride,
49     uint8_t input_zero_point,
50     float input_scale,
51     uint8_t output_zero_point,
52     float output_scale,
53     uint8_t output_min,
54     uint8_t output_max,
55     uint32_t flags,
56     xnn_operator_t* average_pooling_op_out)
57 {
58   xnn_operator_t average_pooling_op = NULL;
59   enum xnn_status status = xnn_status_uninitialized;
60 
61   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
62     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
63       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
64     goto error;
65   }
66 
67   status = xnn_status_invalid_parameter;
68 
69   const uint32_t pooling_size = pooling_height * pooling_width;
70   if (pooling_size == 0) {
71     xnn_log_error(
72       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
73       "pooling size dimensions must be non-zero",
74       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), pooling_width, pooling_height);
75     goto error;
76   }
77 
78   if (pooling_size == 1) {
79     xnn_log_error(
80       "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
81       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
82     goto error;
83   }
84 
85   if (stride_height == 0 || stride_width == 0) {
86     xnn_log_error(
87       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
88       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), stride_width, stride_height);
89     goto error;
90   }
91 
92   if (stride_height > pooling_height) {
93     xnn_log_error(
94       "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
95       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), stride_height, pooling_height);
96     return xnn_status_invalid_parameter;
97   }
98 
99   if (stride_width > pooling_width) {
100     xnn_log_error(
101       "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
102       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), stride_width, pooling_width);
103     return xnn_status_invalid_parameter;
104   }
105 
106   if (channels == 0) {
107     xnn_log_error(
108       "failed to create %s operator with %zu channels: number of channels must be non-zero",
109       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), channels);
110     goto error;
111   }
112 
113   if (input_pixel_stride < channels) {
114     xnn_log_error(
115       "failed to create %s operator with input pixel stride of %zu: "
116       "stride must be at least as large as the number of channels (%zu)",
117       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), input_pixel_stride, channels);
118     goto error;
119   }
120 
121   if (output_pixel_stride < channels) {
122     xnn_log_error(
123       "failed to create %s operator with output pixel stride of %zu: "
124       "stride must be at least as large as the number of channels (%zu)",
125       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), output_pixel_stride, channels);
126     goto error;
127   }
128 
129   if (input_scale <= 0.0f || !isnormal(input_scale)) {
130     xnn_log_error(
131       "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
132       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), input_scale);
133     goto error;
134   }
135 
136   if (output_scale <= 0.0f || !isnormal(output_scale)) {
137     xnn_log_error(
138       "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
139       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), output_scale);
140     goto error;
141   }
142 
143   if (output_min >= output_max) {
144     xnn_log_error(
145       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
146       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), output_min, output_max);
147     goto error;
148   }
149 
150   const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
151   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
152     if (any_padding) {
153       xnn_log_error(
154         "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
155         "TensorFlow SAME padding can't be combined with explicit padding specification",
156         xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
157         input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
158       goto error;
159     }
160   }
161 
162   status = xnn_status_unsupported_parameter;
163 
164   const float input_output_scale = input_scale / output_scale;
165   if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
166     xnn_log_error(
167       "failed to create %s operator with %.7g input scale and %.7g output scale: "
168       "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
169       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
170       input_scale, output_scale, input_output_scale);
171     goto error;
172   }
173 
174   if (pooling_size >= 16777216) {
175     xnn_log_error(
176       "failed to create %s operator with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
177       "the number of elements in the pooling area must be below 2**24",
178       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
179       pooling_size, pooling_width, pooling_height);
180     goto error;
181   }
182 
183   status = xnn_status_out_of_memory;
184 
185   average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
186   if (average_pooling_op == NULL) {
187     xnn_log_error(
188       "failed to allocate %zu bytes for %s operator descriptor",
189       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
190     goto error;
191   }
192 
193   const size_t zero_bytes = channels * sizeof(uint8_t) + XNN_EXTRA_BYTES;
194   void* zero_buffer = xnn_allocate_simd_memory(zero_bytes);
195   if (zero_buffer == NULL) {
196     xnn_log_error(
197       "failed to allocate %zu bytes for %s operator zero padding",
198       zero_bytes, xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
199     goto error;
200   }
201   memset(zero_buffer, input_zero_point, channels * sizeof(uint8_t));
202   average_pooling_op->zero_buffer = zero_buffer;
203 
204   average_pooling_op->padding_top = input_padding_top;
205   average_pooling_op->padding_right = input_padding_right;
206   average_pooling_op->padding_bottom = input_padding_bottom;
207   average_pooling_op->padding_left = input_padding_left;
208 
209   average_pooling_op->kernel_height = pooling_height;
210   average_pooling_op->kernel_width = pooling_width;
211   average_pooling_op->stride_height = stride_height;
212   average_pooling_op->stride_width = stride_width;
213   average_pooling_op->dilation_height = 1;
214   average_pooling_op->dilation_width = 1;
215   average_pooling_op->channels = channels;
216   average_pooling_op->input_pixel_stride = input_pixel_stride;
217   average_pooling_op->output_pixel_stride = output_pixel_stride;
218 
219   average_pooling_op->input_zero_point = (int32_t) (uint32_t) input_zero_point;
220   average_pooling_op->input_scale = input_scale;
221   average_pooling_op->output_scale = output_scale;
222 
223   // Number of rows read in the AVGPOOL micro-kernel.
224   const size_t avgpool_nrows =
225     round_up(doz(pooling_size, xnn_params.qu8.avgpool.primary_tile), xnn_params.qu8.avgpool.incremental_tile) + xnn_params.qu8.avgpool.primary_tile;
226   const float requantization_scale = input_scale / (output_scale * (float) pooling_size);
227   xnn_params.qu8.avgpool.init.qu8(&average_pooling_op->params.qu8_avgpool,
228     (int32_t) -((uint32_t) input_zero_point * (uint32_t) avgpool_nrows),
229     requantization_scale, output_zero_point, output_min, output_max);
230   xnn_params.qu8.gavgpool.init.qu8(&average_pooling_op->params.qu8_gavgpool,
231     0 /* bias */, requantization_scale, output_zero_point, output_min, output_max);
232 
233   average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_qu8;
234   average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
235   average_pooling_op->flags = flags;
236 
237   *average_pooling_op_out = average_pooling_op;
238   return xnn_status_success;
239 
240 error:
241   xnn_delete_operator(average_pooling_op);
242   return status;
243 }
244 
xnn_create_average_pooling2d_nhwc_f16(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)245 enum xnn_status xnn_create_average_pooling2d_nhwc_f16(
246     uint32_t input_padding_top,
247     uint32_t input_padding_right,
248     uint32_t input_padding_bottom,
249     uint32_t input_padding_left,
250     uint32_t pooling_height,
251     uint32_t pooling_width,
252     uint32_t stride_height,
253     uint32_t stride_width,
254     size_t channels,
255     size_t input_pixel_stride,
256     size_t output_pixel_stride,
257     float output_min,
258     float output_max,
259     uint32_t flags,
260     xnn_operator_t* average_pooling_op_out)
261 {
262   xnn_operator_t average_pooling_op = NULL;
263   enum xnn_status status = xnn_status_uninitialized;
264 
265   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
266     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
267       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
268     goto error;
269   }
270 
271   status = xnn_status_unsupported_hardware;
272 
273   if ((xnn_params.init_flags & XNN_INIT_FLAG_F16) != XNN_INIT_FLAG_F16) {
274     xnn_log_error(
275       "failed to create %s operator: operations on data type are not supported",
276       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
277     goto error;
278   }
279 
280   status = xnn_status_invalid_parameter;
281 
282   const uint32_t pooling_size = pooling_height * pooling_width;
283   if (pooling_size == 0) {
284     xnn_log_error(
285       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
286       "pooling size dimensions must be non-zero",
287       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), pooling_width, pooling_height);
288     goto error;
289   }
290 
291   if (pooling_size == 1) {
292     xnn_log_error(
293       "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
294       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
295     goto error;
296   }
297 
298   if (stride_height == 0 || stride_width == 0) {
299     xnn_log_error(
300       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
301       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), stride_width, stride_height);
302     goto error;
303   }
304 
305   if (stride_height > pooling_height) {
306     xnn_log_error(
307       "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
308       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), stride_height, pooling_height);
309     return xnn_status_invalid_parameter;
310   }
311 
312   if (stride_width > pooling_width) {
313     xnn_log_error(
314       "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
315       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), stride_width, pooling_width);
316     return xnn_status_invalid_parameter;
317   }
318 
319   if (channels == 0) {
320     xnn_log_error(
321       "failed to create %s operator with %zu channels: number of channels must be non-zero",
322       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), channels);
323     goto error;
324   }
325 
326   if (input_pixel_stride < channels) {
327     xnn_log_error(
328       "failed to create %s operator with input pixel stride of %zu: "
329       "stride must be at least as large as the number of channels (%zu)",
330       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), input_pixel_stride, channels);
331     goto error;
332   }
333 
334   if (output_pixel_stride < channels) {
335     xnn_log_error(
336       "failed to create %s operator with output pixel stride of %zu: "
337       "stride must be at least as large as the number of channels (%zu)",
338       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), output_pixel_stride, channels);
339     goto error;
340   }
341 
342   if (isnan(output_min)) {
343     xnn_log_error(
344       "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
345       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
346     goto error;
347   }
348 
349   if (isnan(output_max)) {
350     xnn_log_error(
351       "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
352       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
353     goto error;
354   }
355 
356   const uint16_t fp16_output_min = fp16_ieee_from_fp32_value(output_min);
357   const uint16_t fp16_output_max = fp16_ieee_from_fp32_value(output_max);
358   const float rounded_output_min = fp16_ieee_to_fp32_value(fp16_output_min);
359   const float rounded_output_max = fp16_ieee_to_fp32_value(fp16_output_max);
360   if (rounded_output_min >= rounded_output_max) {
361     xnn_log_error(
362       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
363       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), rounded_output_min, rounded_output_max);
364     goto error;
365   }
366 
367   const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
368   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
369     if (any_padding) {
370       xnn_log_error(
371         "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
372         "TensorFlow SAME padding can't be combined with explicit padding specification",
373         xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16),
374         input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
375       goto error;
376     }
377   }
378 
379   status = xnn_status_out_of_memory;
380 
381   average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
382   if (average_pooling_op == NULL) {
383     xnn_log_error(
384       "failed to allocate %zu bytes for %s operator descriptor",
385       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
386     goto error;
387   }
388 
389   const size_t zero_bytes = channels * sizeof(uint16_t) + XNN_EXTRA_BYTES;
390   void* zero_buffer = xnn_allocate_zero_simd_memory(zero_bytes);
391   if (zero_buffer == NULL) {
392     xnn_log_error(
393       "failed to allocate %zu bytes for %s operator zero padding",
394       zero_bytes, xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
395     goto error;
396   }
397   average_pooling_op->zero_buffer = zero_buffer;
398 
399   average_pooling_op->padding_top = input_padding_top;
400   average_pooling_op->padding_right = input_padding_right;
401   average_pooling_op->padding_bottom = input_padding_bottom;
402   average_pooling_op->padding_left = input_padding_left;
403 
404   average_pooling_op->kernel_height = pooling_height;
405   average_pooling_op->kernel_width = pooling_width;
406   average_pooling_op->stride_height = stride_height;
407   average_pooling_op->stride_width = stride_width;
408   average_pooling_op->dilation_height = 1;
409   average_pooling_op->dilation_width = 1;
410   average_pooling_op->channels = channels;
411   average_pooling_op->input_pixel_stride = input_pixel_stride;
412   average_pooling_op->output_pixel_stride = output_pixel_stride;
413 
414   average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f16;
415   xnn_params.f16.avgpool.init.f16(&average_pooling_op->params.f16_scaleminmax,
416     fp16_ieee_from_fp32_value(1.0f / (float) (int32_t) pooling_size), fp16_output_min, fp16_output_max);
417   const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
418   if (any_padding || tf_same_padding) {
419     xnn_params.f16.pavgpool.init.f16(&average_pooling_op->params.f16_minmax, fp16_output_min, fp16_output_max);
420     average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
421   } else {
422     average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
423   }
424   average_pooling_op->flags = flags;
425 
426   *average_pooling_op_out = average_pooling_op;
427   return xnn_status_success;
428 
429 error:
430   xnn_delete_operator(average_pooling_op);
431   return status;
432 }
433 
xnn_create_average_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)434 enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
435     uint32_t input_padding_top,
436     uint32_t input_padding_right,
437     uint32_t input_padding_bottom,
438     uint32_t input_padding_left,
439     uint32_t pooling_height,
440     uint32_t pooling_width,
441     uint32_t stride_height,
442     uint32_t stride_width,
443     size_t channels,
444     size_t input_pixel_stride,
445     size_t output_pixel_stride,
446     float output_min,
447     float output_max,
448     uint32_t flags,
449     xnn_operator_t* average_pooling_op_out)
450 {
451   xnn_operator_t average_pooling_op = NULL;
452   enum xnn_status status = xnn_status_uninitialized;
453 
454   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
455     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
456       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
457     goto error;
458   }
459 
460   status = xnn_status_invalid_parameter;
461 
462   const uint32_t pooling_size = pooling_height * pooling_width;
463   if (pooling_size == 0) {
464     xnn_log_error(
465       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
466       "pooling size dimensions must be non-zero",
467       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), pooling_width, pooling_height);
468     goto error;
469   }
470 
471   if (pooling_size == 1) {
472     xnn_log_error(
473       "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
474       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
475     goto error;
476   }
477 
478   if (stride_height == 0 || stride_width == 0) {
479     xnn_log_error(
480       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
481       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), stride_width, stride_height);
482     goto error;
483   }
484 
485   if (stride_height > pooling_height) {
486     xnn_log_error(
487       "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
488       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), stride_height, pooling_height);
489     return xnn_status_invalid_parameter;
490   }
491 
492   if (stride_width > pooling_width) {
493     xnn_log_error(
494       "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
495       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), stride_width, pooling_width);
496     return xnn_status_invalid_parameter;
497   }
498 
499   if (channels == 0) {
500     xnn_log_error(
501       "failed to create %s operator with %zu channels: number of channels must be non-zero",
502       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), channels);
503     goto error;
504   }
505 
506   if (input_pixel_stride < channels) {
507     xnn_log_error(
508       "failed to create %s operator with input pixel stride of %zu: "
509       "stride must be at least as large as the number of channels (%zu)",
510       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), input_pixel_stride, channels);
511     goto error;
512   }
513 
514   if (output_pixel_stride < channels) {
515     xnn_log_error(
516       "failed to create %s operator with output pixel stride of %zu: "
517       "stride must be at least as large as the number of channels (%zu)",
518       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), output_pixel_stride, channels);
519     goto error;
520   }
521 
522   if (isnan(output_min)) {
523     xnn_log_error(
524       "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
525       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
526     goto error;
527   }
528 
529   if (isnan(output_max)) {
530     xnn_log_error(
531       "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
532       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
533     goto error;
534   }
535 
536   if (output_min >= output_max) {
537     xnn_log_error(
538       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
539       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), output_min, output_max);
540     goto error;
541   }
542 
543   const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
544   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
545     if (any_padding) {
546       xnn_log_error(
547         "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
548         "TensorFlow SAME padding can't be combined with explicit padding specification",
549         xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32),
550         input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
551       goto error;
552     }
553   }
554 
555   status = xnn_status_out_of_memory;
556 
557   average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
558   if (average_pooling_op == NULL) {
559     xnn_log_error(
560       "failed to allocate %zu bytes for %s operator descriptor",
561       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
562     goto error;
563   }
564 
565   const size_t zero_bytes = channels * sizeof(float) + XNN_EXTRA_BYTES;
566   void* zero_buffer = xnn_allocate_zero_simd_memory(zero_bytes);
567   if (zero_buffer == NULL) {
568     xnn_log_error(
569       "failed to allocate %zu bytes for %s operator zero padding",
570       zero_bytes, xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
571     goto error;
572   }
573   average_pooling_op->zero_buffer = zero_buffer;
574 
575   average_pooling_op->padding_top = input_padding_top;
576   average_pooling_op->padding_right = input_padding_right;
577   average_pooling_op->padding_bottom = input_padding_bottom;
578   average_pooling_op->padding_left = input_padding_left;
579 
580   average_pooling_op->kernel_height = pooling_height;
581   average_pooling_op->kernel_width = pooling_width;
582   average_pooling_op->stride_height = stride_height;
583   average_pooling_op->stride_width = stride_width;
584   average_pooling_op->dilation_height = 1;
585   average_pooling_op->dilation_width = 1;
586   average_pooling_op->channels = channels;
587   average_pooling_op->input_pixel_stride = input_pixel_stride;
588   average_pooling_op->output_pixel_stride = output_pixel_stride;
589 
590   average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
591   xnn_params.f32.avgpool.init.f32(&average_pooling_op->params.f32_scaleminmax,
592     1.0f / (float) (int32_t) pooling_size, output_min, output_max);
593   const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
594   if (any_padding || tf_same_padding) {
595     xnn_params.f32.pavgpool.init.f32(&average_pooling_op->params.f32_minmax, output_min, output_max);
596     average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
597   } else {
598     average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
599   }
600   average_pooling_op->flags = flags;
601 
602   *average_pooling_op_out = average_pooling_op;
603   return xnn_status_success;
604 
605 error:
606   xnn_delete_operator(average_pooling_op);
607   return status;
608 }
609 
setup_average_pooling2d(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_data_element_size,uint32_t log2_weight_element_size,xnn_indirection_init_pavgpool2d_fn indirection_init_pavgpool2d,struct avgpool_parameters avgpool[restrict XNN_MIN_ELEMENTS (1)],struct pavgpool_parameters pavgpool[restrict1],struct gavgpool_parameters gavgpool[restrict XNN_MIN_ELEMENTS (1)],const void * params,size_t params_size,const void * global_params,size_t global_params_size,size_t num_threads,bool is_pixelwise)610 static enum xnn_status setup_average_pooling2d(
611   xnn_operator_t average_pooling_op,
612   size_t batch_size,
613   size_t input_height,
614   size_t input_width,
615   const void* input,
616   void* output,
617   uint32_t log2_data_element_size,
618   uint32_t log2_weight_element_size,
619   xnn_indirection_init_pavgpool2d_fn indirection_init_pavgpool2d,
620   struct avgpool_parameters avgpool[restrict XNN_MIN_ELEMENTS(1)],
621   struct pavgpool_parameters pavgpool[restrict 1],
622   struct gavgpool_parameters gavgpool[restrict XNN_MIN_ELEMENTS(1)],
623   const void* params,
624   size_t params_size,
625   const void* global_params,
626   size_t global_params_size,
627   size_t num_threads,
628   bool is_pixelwise)
629 {
630   assert(!is_pixelwise || pavgpool != NULL && indirection_init_pavgpool2d != NULL);
631 
632   average_pooling_op->state = xnn_run_state_invalid;
633 
634   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
635     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
636       xnn_operator_type_to_string(average_pooling_op->type));
637     return xnn_status_uninitialized;
638   }
639 
640   if (input_width == 0 || input_height == 0) {
641     xnn_log_error(
642       "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
643       xnn_operator_type_to_string(average_pooling_op->type), input_width, input_height);
644     return xnn_status_invalid_parameter;
645   }
646 
647   if (batch_size == 0) {
648     average_pooling_op->state = xnn_run_state_skip;
649     return xnn_status_success;
650   }
651 
652   average_pooling_op->input_height = input_height;
653   average_pooling_op->input_width = input_width;
654   average_pooling_op->input = input;
655 
656   const bool tf_same_padding = (average_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
657   if (tf_same_padding) {
658     average_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
659         input_height, average_pooling_op->stride_height);
660     average_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
661         input_width, average_pooling_op->stride_width);
662 
663     const uint32_t kernel_height = average_pooling_op->kernel_height;
664     const uint32_t kernel_width = average_pooling_op->kernel_width;
665     const uint32_t total_padding_height =
666       (average_pooling_op->output_height - 1) * average_pooling_op->stride_height + kernel_height - input_height;
667     const uint32_t total_padding_width =
668       (average_pooling_op->output_width - 1) * average_pooling_op->stride_width + kernel_width - input_width;
669     average_pooling_op->padding_top = total_padding_height / 2;
670     average_pooling_op->padding_left = total_padding_width / 2;
671     average_pooling_op->padding_bottom = total_padding_height - average_pooling_op->padding_top;
672     average_pooling_op->padding_right = total_padding_width - average_pooling_op->padding_left;
673   } else {
674     average_pooling_op->output_height = xnn_compute_convolution_output_dimension(
675         average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
676         average_pooling_op->kernel_height,
677         1,
678         average_pooling_op->stride_height);
679     average_pooling_op->output_width = xnn_compute_convolution_output_dimension(
680         average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
681         average_pooling_op->kernel_width,
682         1,
683         average_pooling_op->stride_width);
684   }
685   average_pooling_op->output = output;
686 
687   const size_t output_height = average_pooling_op->output_height;
688   const size_t output_width = average_pooling_op->output_width;
689   const size_t padded_input_width = average_pooling_op->padding_left + input_width + average_pooling_op->padding_right;
690   const size_t padded_input_height = average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom;
691   if (padded_input_width == average_pooling_op->kernel_width && padded_input_height == average_pooling_op->kernel_height) {
692     // Global average pooling
693     const size_t input_elements = input_height * input_width;
694     const size_t input_stride_in_bytes = average_pooling_op->input_pixel_stride << log2_data_element_size;
695     const size_t channels = average_pooling_op->channels;
696     average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
697         .input = input,
698         .zero = average_pooling_op->zero_buffer,
699         .input_pixel_stride = input_stride_in_bytes,
700         .input_batch_stride = input_stride_in_bytes * input_elements,
701         .input_elements = input_elements,
702         .channels = channels,
703         .output = output,
704         .output_batch_stride = average_pooling_op->output_pixel_stride << log2_data_element_size,
705     };
706     memcpy(&average_pooling_op->context.global_average_pooling_nwc.params, global_params, global_params_size);
707     average_pooling_op->compute.type = xnn_parallelization_type_1d;
708     average_pooling_op->compute.range[0] = batch_size;
709 
710     if (input_elements <= gavgpool->row_tile) {
711       average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
712       average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = gavgpool->unipass;
713     } else {
714       average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
715       average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = gavgpool->multipass;
716     }
717   } else {
718     // Non-global average pooling
719     const size_t pooling_height = average_pooling_op->kernel_height;
720     const size_t pooling_width = average_pooling_op->kernel_width;
721     const size_t pooling_size = pooling_height * pooling_width;
722 
723     const uint32_t primary_tile = is_pixelwise ? pavgpool->primary_tile : avgpool->primary_tile;
724 
725     const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
726     const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
727 
728     const size_t last_input_height = average_pooling_op->last_input_height;
729     const size_t last_input_width = average_pooling_op->last_input_width;
730     if (input_height != last_input_height || input_width != last_input_width) {
731       // Micro-kernel may read up to (primary_tile - 1) elements after the end of indirection buffer.
732       const size_t indirection_buffer_size = sizeof(void*) * ((primary_tile - 1) + output_height * step_height);
733 
734       const void** indirection_buffer =
735         (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
736       if (indirection_buffer == NULL) {
737         xnn_log_error("failed to allocate %zu bytes for %s operator indirection buffer",
738           indirection_buffer_size, xnn_operator_type_to_string(average_pooling_op->type));
739         return xnn_status_out_of_memory;
740       }
741       average_pooling_op->indirection_buffer = indirection_buffer;
742 
743       xnn_indirection_init_dwconv2d(average_pooling_op, step_height, step_width, primary_tile, log2_data_element_size);
744 
745       average_pooling_op->last_input = input;
746       average_pooling_op->last_input_height = input_height;
747       average_pooling_op->last_input_width = input_width;
748     }
749 
750     const size_t channels = average_pooling_op->channels;
751 
752     const size_t indirect_input_height_stride = step_height * sizeof(void*);
753     const size_t output_width_stride = average_pooling_op->output_pixel_stride << log2_data_element_size;
754     const size_t output_height_stride = output_width * output_width_stride;
755 
756     if (is_pixelwise) {
757       assert(indirection_init_pavgpool2d != NULL);
758 
759       if (input_height != last_input_height || input_width != last_input_width) {
760         const size_t pixelwise_buffer_size = (output_height * output_width) << log2_weight_element_size;
761         void* pixelwise_buffer = xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
762         if (pixelwise_buffer == NULL) {
763           xnn_log_error("failed to allocate %zu bytes for %s operator pixelwise buffer",
764             pixelwise_buffer_size, xnn_operator_type_to_string(average_pooling_op->type));
765           return xnn_status_out_of_memory;
766         }
767         average_pooling_op->pixelwise_buffer = pixelwise_buffer;
768 
769         indirection_init_pavgpool2d(
770           input_height, input_width,
771           output_height, output_width,
772           average_pooling_op->kernel_height, average_pooling_op->kernel_width,
773           average_pooling_op->stride_height, average_pooling_op->stride_width,
774           average_pooling_op->padding_top, average_pooling_op->padding_left,
775           pixelwise_buffer);
776       }
777 
778       const uint32_t incremental_tile = pavgpool->incremental_tile;
779       const size_t multipass_adjustment =
780         pooling_size > primary_tile ? round_up(pooling_size - primary_tile, incremental_tile) + primary_tile - incremental_tile : 0;
781       average_pooling_op->context.pixelwise_average_pooling = (struct pixelwise_average_pooling_context) {
782         .indirect_input = average_pooling_op->indirection_buffer,
783         .indirect_input_height_stride = indirect_input_height_stride,
784         .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_data_element_size,
785         .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
786         .pixelwise_buffer = average_pooling_op->pixelwise_buffer,
787         .pixelwise_buffer_height_stride = output_width << log2_data_element_size,
788         .output = output,
789         .output_batch_stride = output_height * output_height_stride,
790         .output_height_stride = output_height_stride,
791         .output_width = output_width,
792         .pooling_size = pooling_size,
793         .channels = channels,
794         .zero = average_pooling_op->zero_buffer,
795         .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
796         .output_increment = output_width_stride - (channels << log2_data_element_size),
797       };
798       memcpy(&average_pooling_op->context.pixelwise_average_pooling.params, params, params_size);
799       if (pooling_size <= primary_tile) {
800         average_pooling_op->context.pixelwise_average_pooling.unipass_ukernel = pavgpool->unipass;
801         average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_unipass;
802       } else {
803         average_pooling_op->context.pixelwise_average_pooling.multipass_ukernel = pavgpool->multipass;
804         average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_multipass;
805       }
806     } else {
807       const uint32_t incremental_tile = avgpool->incremental_tile;
808       const size_t multipass_adjustment =
809         pooling_size > primary_tile ? round_up(pooling_size - primary_tile, incremental_tile) + primary_tile - incremental_tile : 0;
810       average_pooling_op->context.average_pooling = (struct average_pooling_context) {
811         .indirect_input = average_pooling_op->indirection_buffer,
812         .indirect_input_height_stride = indirect_input_height_stride,
813         .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
814         .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_data_element_size,
815         .output = output,
816         .output_batch_stride = output_height * output_height_stride,
817         .output_height_stride = output_height_stride,
818         .output_width = output_width,
819         .pooling_size = pooling_size,
820         .channels = channels,
821         .zero = average_pooling_op->zero_buffer,
822         .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
823         .output_increment = output_width_stride - (channels << log2_data_element_size),
824         .params.f32 = average_pooling_op->params.f32_scaleminmax,
825       };
826       memcpy(&average_pooling_op->context.average_pooling.params, params, params_size);
827       if (pooling_size <= primary_tile) {
828         average_pooling_op->context.average_pooling.unipass_ukernel = avgpool->unipass;
829         average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
830       } else {
831         average_pooling_op->context.average_pooling.multipass_ukernel = avgpool->multipass;
832         average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
833       }
834     }
835     average_pooling_op->compute.type = xnn_parallelization_type_2d;
836     average_pooling_op->compute.range[0] = batch_size;
837     average_pooling_op->compute.range[1] = output_height;
838   }
839   average_pooling_op->state = xnn_run_state_ready;
840 
841   return xnn_status_success;
842 }
843 
xnn_setup_average_pooling2d_nhwc_qu8(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)844 enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
845     xnn_operator_t average_pooling_op,
846     size_t batch_size,
847     size_t input_height,
848     size_t input_width,
849     const uint8_t* input,
850     uint8_t* output,
851     pthreadpool_t threadpool)
852 {
853   if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_qu8) {
854     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
855       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
856       xnn_operator_type_to_string(average_pooling_op->type));
857     return xnn_status_invalid_parameter;
858   }
859 
860   assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling);
861 
862   // Number of rows read in the GAVGPOOL micro-kernel.
863   const size_t input_size = input_height * input_width;
864   const size_t pooling_size = average_pooling_op->kernel_height * average_pooling_op->kernel_width;
865   const size_t gavgpool_nrows = round_up(input_size, xnn_params.qu8.gavgpool.row_tile);
866   xnn_params.qu8.gavgpool.update.qu8(
867     &average_pooling_op->params.qu8_gavgpool,
868     -(average_pooling_op->input_zero_point * (int32_t) gavgpool_nrows),
869     average_pooling_op->input_scale / (average_pooling_op->output_scale * (float) pooling_size));
870 
871   return setup_average_pooling2d(
872     average_pooling_op,
873     batch_size, input_height, input_width,
874     input, output,
875     0 /* log2(sizeof(data element)) = log2(sizeof(uint8_t)) */,
876     0 /* log2(sizeof(weight element)) = log2(sizeof(uint8_t)) */,
877     NULL /* indirection_init_pavgpool2d */,
878     &xnn_params.qu8.avgpool,
879     NULL /* no PAVGPOOL micro-kernel */,
880     &xnn_params.qu8.gavgpool,
881     &average_pooling_op->params.qu8_avgpool,
882     sizeof(average_pooling_op->params.qu8_avgpool),
883     &average_pooling_op->params.qu8_gavgpool,
884     sizeof(average_pooling_op->params.qu8_gavgpool),
885     pthreadpool_get_threads_count(threadpool),
886     false /* pixelwise not supported */);
887 }
888 
xnn_setup_average_pooling2d_nhwc_f16(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)889 enum xnn_status xnn_setup_average_pooling2d_nhwc_f16(
890     xnn_operator_t average_pooling_op,
891     size_t batch_size,
892     size_t input_height,
893     size_t input_width,
894     const void* input,
895     void* output,
896     pthreadpool_t threadpool)
897 {
898   if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f16) {
899     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
900       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16),
901       xnn_operator_type_to_string(average_pooling_op->type));
902     return xnn_status_invalid_parameter;
903   }
904 
905   assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling ||
906          average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
907 
908   const void* pooling_params = &average_pooling_op->params.f16_scaleminmax;
909   size_t pooling_params_size = sizeof(average_pooling_op->params.f16_scaleminmax);
910   const bool is_pixelwise = average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling;
911   if (is_pixelwise) {
912     const size_t input_size = input_height * input_width;
913     xnn_params.f16.gavgpool.update.f16(&average_pooling_op->params.f16_scaleminmax, fp16_ieee_from_fp32_value(1.0f / (float) (int32_t) input_size));
914     pooling_params = &average_pooling_op->params.f16_minmax;
915     pooling_params_size = sizeof(average_pooling_op->params.f16_minmax);
916   }
917 
918   return setup_average_pooling2d(
919     average_pooling_op,
920     batch_size, input_height, input_width,
921     input, output,
922     1 /* log2(sizeof(data element)) = log2(sizeof(half)) */,
923     1 /* log2(sizeof(weight element)) = log2(sizeof(half)) */,
924     (xnn_indirection_init_pavgpool2d_fn) xnn_indirection_init_pavgpool2d_f16,
925     &xnn_params.f16.avgpool, &xnn_params.f16.pavgpool, &xnn_params.f16.gavgpool,
926     pooling_params, pooling_params_size,
927     &average_pooling_op->params.f16_scaleminmax, sizeof(average_pooling_op->params.f16_scaleminmax),
928     pthreadpool_get_threads_count(threadpool),
929     is_pixelwise);
930 }
931 
xnn_setup_average_pooling2d_nhwc_f32(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)932 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
933     xnn_operator_t average_pooling_op,
934     size_t batch_size,
935     size_t input_height,
936     size_t input_width,
937     const float* input,
938     float* output,
939     pthreadpool_t threadpool)
940 {
941   if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
942     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
943       xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32),
944       xnn_operator_type_to_string(average_pooling_op->type));
945     return xnn_status_invalid_parameter;
946   }
947 
948   assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling ||
949          average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
950 
951   const void* pooling_params = &average_pooling_op->params.f32_scaleminmax;
952   size_t pooling_params_size = sizeof(average_pooling_op->params.f32_scaleminmax);
953   const bool is_pixelwise = average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling;
954   if (is_pixelwise) {
955     const size_t input_size = input_height * input_width;
956     xnn_params.f32.gavgpool.update.f32(&average_pooling_op->params.f32_scaleminmax, 1.0f / (float) (int32_t) input_size);
957     pooling_params = &average_pooling_op->params.f32_minmax;
958     pooling_params_size = sizeof(average_pooling_op->params.f32_minmax);
959   }
960 
961   return setup_average_pooling2d(
962     average_pooling_op,
963     batch_size, input_height, input_width,
964     input, output,
965     2 /* log2(sizeof(data element)) = log2(sizeof(float)) */,
966     2 /* log2(sizeof(weight element)) = log2(sizeof(float)) */,
967     (xnn_indirection_init_pavgpool2d_fn) xnn_indirection_init_pavgpool2d_f32,
968     &xnn_params.f32.avgpool, &xnn_params.f32.pavgpool, &xnn_params.f32.gavgpool,
969     pooling_params, pooling_params_size,
970     &average_pooling_op->params.f32_scaleminmax, sizeof(average_pooling_op->params.f32_scaleminmax),
971     pthreadpool_get_threads_count(threadpool),
972     is_pixelwise);
973 }
974