1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack.h>
20 #include <xnnpack/allocator.h>
21 #include <xnnpack/operator.h>
22 #include <xnnpack/common.h>
23 #include <xnnpack/log.h>
24 #include <xnnpack/math.h>
25 #include <xnnpack/microparams-init.h>
26 #include <xnnpack/params.h>
27 #include <xnnpack/indirection.h>
28
29
compute_output_dimension_with_tf_same_padding(size_t input_dimension,size_t stride_dimension)30 static inline size_t compute_output_dimension_with_tf_same_padding(
31 size_t input_dimension,
32 size_t stride_dimension)
33 {
34 return divide_round_up(input_dimension, stride_dimension);
35 }
36
xnn_create_average_pooling2d_nhwc_qu8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)37 enum xnn_status xnn_create_average_pooling2d_nhwc_qu8(
38 uint32_t input_padding_top,
39 uint32_t input_padding_right,
40 uint32_t input_padding_bottom,
41 uint32_t input_padding_left,
42 uint32_t pooling_height,
43 uint32_t pooling_width,
44 uint32_t stride_height,
45 uint32_t stride_width,
46 size_t channels,
47 size_t input_pixel_stride,
48 size_t output_pixel_stride,
49 uint8_t input_zero_point,
50 float input_scale,
51 uint8_t output_zero_point,
52 float output_scale,
53 uint8_t output_min,
54 uint8_t output_max,
55 uint32_t flags,
56 xnn_operator_t* average_pooling_op_out)
57 {
58 xnn_operator_t average_pooling_op = NULL;
59 enum xnn_status status = xnn_status_uninitialized;
60
61 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
62 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
63 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
64 goto error;
65 }
66
67 status = xnn_status_invalid_parameter;
68
69 const uint32_t pooling_size = pooling_height * pooling_width;
70 if (pooling_size == 0) {
71 xnn_log_error(
72 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
73 "pooling size dimensions must be non-zero",
74 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), pooling_width, pooling_height);
75 goto error;
76 }
77
78 if (pooling_size == 1) {
79 xnn_log_error(
80 "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
81 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
82 goto error;
83 }
84
85 if (stride_height == 0 || stride_width == 0) {
86 xnn_log_error(
87 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
88 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), stride_width, stride_height);
89 goto error;
90 }
91
92 if (stride_height > pooling_height) {
93 xnn_log_error(
94 "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
95 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), stride_height, pooling_height);
96 return xnn_status_invalid_parameter;
97 }
98
99 if (stride_width > pooling_width) {
100 xnn_log_error(
101 "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
102 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), stride_width, pooling_width);
103 return xnn_status_invalid_parameter;
104 }
105
106 if (channels == 0) {
107 xnn_log_error(
108 "failed to create %s operator with %zu channels: number of channels must be non-zero",
109 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), channels);
110 goto error;
111 }
112
113 if (input_pixel_stride < channels) {
114 xnn_log_error(
115 "failed to create %s operator with input pixel stride of %zu: "
116 "stride must be at least as large as the number of channels (%zu)",
117 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), input_pixel_stride, channels);
118 goto error;
119 }
120
121 if (output_pixel_stride < channels) {
122 xnn_log_error(
123 "failed to create %s operator with output pixel stride of %zu: "
124 "stride must be at least as large as the number of channels (%zu)",
125 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), output_pixel_stride, channels);
126 goto error;
127 }
128
129 if (input_scale <= 0.0f || !isnormal(input_scale)) {
130 xnn_log_error(
131 "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
132 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), input_scale);
133 goto error;
134 }
135
136 if (output_scale <= 0.0f || !isnormal(output_scale)) {
137 xnn_log_error(
138 "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
139 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), output_scale);
140 goto error;
141 }
142
143 if (output_min >= output_max) {
144 xnn_log_error(
145 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
146 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8), output_min, output_max);
147 goto error;
148 }
149
150 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
151 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
152 if (any_padding) {
153 xnn_log_error(
154 "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
155 "TensorFlow SAME padding can't be combined with explicit padding specification",
156 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
157 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
158 goto error;
159 }
160 }
161
162 status = xnn_status_unsupported_parameter;
163
164 const float input_output_scale = input_scale / output_scale;
165 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
166 xnn_log_error(
167 "failed to create %s operator with %.7g input scale and %.7g output scale: "
168 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
169 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
170 input_scale, output_scale, input_output_scale);
171 goto error;
172 }
173
174 if (pooling_size >= 16777216) {
175 xnn_log_error(
176 "failed to create %s operator with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
177 "the number of elements in the pooling area must be below 2**24",
178 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
179 pooling_size, pooling_width, pooling_height);
180 goto error;
181 }
182
183 status = xnn_status_out_of_memory;
184
185 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
186 if (average_pooling_op == NULL) {
187 xnn_log_error(
188 "failed to allocate %zu bytes for %s operator descriptor",
189 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
190 goto error;
191 }
192
193 const size_t zero_bytes = channels * sizeof(uint8_t) + XNN_EXTRA_BYTES;
194 void* zero_buffer = xnn_allocate_simd_memory(zero_bytes);
195 if (zero_buffer == NULL) {
196 xnn_log_error(
197 "failed to allocate %zu bytes for %s operator zero padding",
198 zero_bytes, xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8));
199 goto error;
200 }
201 memset(zero_buffer, input_zero_point, channels * sizeof(uint8_t));
202 average_pooling_op->zero_buffer = zero_buffer;
203
204 average_pooling_op->padding_top = input_padding_top;
205 average_pooling_op->padding_right = input_padding_right;
206 average_pooling_op->padding_bottom = input_padding_bottom;
207 average_pooling_op->padding_left = input_padding_left;
208
209 average_pooling_op->kernel_height = pooling_height;
210 average_pooling_op->kernel_width = pooling_width;
211 average_pooling_op->stride_height = stride_height;
212 average_pooling_op->stride_width = stride_width;
213 average_pooling_op->dilation_height = 1;
214 average_pooling_op->dilation_width = 1;
215 average_pooling_op->channels = channels;
216 average_pooling_op->input_pixel_stride = input_pixel_stride;
217 average_pooling_op->output_pixel_stride = output_pixel_stride;
218
219 average_pooling_op->input_zero_point = (int32_t) (uint32_t) input_zero_point;
220 average_pooling_op->input_scale = input_scale;
221 average_pooling_op->output_scale = output_scale;
222
223 // Number of rows read in the AVGPOOL micro-kernel.
224 const size_t avgpool_nrows =
225 round_up(doz(pooling_size, xnn_params.qu8.avgpool.primary_tile), xnn_params.qu8.avgpool.incremental_tile) + xnn_params.qu8.avgpool.primary_tile;
226 const float requantization_scale = input_scale / (output_scale * (float) pooling_size);
227 xnn_params.qu8.avgpool.init.qu8(&average_pooling_op->params.qu8_avgpool,
228 (int32_t) -((uint32_t) input_zero_point * (uint32_t) avgpool_nrows),
229 requantization_scale, output_zero_point, output_min, output_max);
230 xnn_params.qu8.gavgpool.init.qu8(&average_pooling_op->params.qu8_gavgpool,
231 0 /* bias */, requantization_scale, output_zero_point, output_min, output_max);
232
233 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_qu8;
234 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
235 average_pooling_op->flags = flags;
236
237 *average_pooling_op_out = average_pooling_op;
238 return xnn_status_success;
239
240 error:
241 xnn_delete_operator(average_pooling_op);
242 return status;
243 }
244
xnn_create_average_pooling2d_nhwc_f16(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)245 enum xnn_status xnn_create_average_pooling2d_nhwc_f16(
246 uint32_t input_padding_top,
247 uint32_t input_padding_right,
248 uint32_t input_padding_bottom,
249 uint32_t input_padding_left,
250 uint32_t pooling_height,
251 uint32_t pooling_width,
252 uint32_t stride_height,
253 uint32_t stride_width,
254 size_t channels,
255 size_t input_pixel_stride,
256 size_t output_pixel_stride,
257 float output_min,
258 float output_max,
259 uint32_t flags,
260 xnn_operator_t* average_pooling_op_out)
261 {
262 xnn_operator_t average_pooling_op = NULL;
263 enum xnn_status status = xnn_status_uninitialized;
264
265 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
266 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
267 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
268 goto error;
269 }
270
271 status = xnn_status_unsupported_hardware;
272
273 if ((xnn_params.init_flags & XNN_INIT_FLAG_F16) != XNN_INIT_FLAG_F16) {
274 xnn_log_error(
275 "failed to create %s operator: operations on data type are not supported",
276 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
277 goto error;
278 }
279
280 status = xnn_status_invalid_parameter;
281
282 const uint32_t pooling_size = pooling_height * pooling_width;
283 if (pooling_size == 0) {
284 xnn_log_error(
285 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
286 "pooling size dimensions must be non-zero",
287 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), pooling_width, pooling_height);
288 goto error;
289 }
290
291 if (pooling_size == 1) {
292 xnn_log_error(
293 "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
294 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
295 goto error;
296 }
297
298 if (stride_height == 0 || stride_width == 0) {
299 xnn_log_error(
300 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
301 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), stride_width, stride_height);
302 goto error;
303 }
304
305 if (stride_height > pooling_height) {
306 xnn_log_error(
307 "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
308 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), stride_height, pooling_height);
309 return xnn_status_invalid_parameter;
310 }
311
312 if (stride_width > pooling_width) {
313 xnn_log_error(
314 "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
315 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), stride_width, pooling_width);
316 return xnn_status_invalid_parameter;
317 }
318
319 if (channels == 0) {
320 xnn_log_error(
321 "failed to create %s operator with %zu channels: number of channels must be non-zero",
322 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), channels);
323 goto error;
324 }
325
326 if (input_pixel_stride < channels) {
327 xnn_log_error(
328 "failed to create %s operator with input pixel stride of %zu: "
329 "stride must be at least as large as the number of channels (%zu)",
330 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), input_pixel_stride, channels);
331 goto error;
332 }
333
334 if (output_pixel_stride < channels) {
335 xnn_log_error(
336 "failed to create %s operator with output pixel stride of %zu: "
337 "stride must be at least as large as the number of channels (%zu)",
338 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), output_pixel_stride, channels);
339 goto error;
340 }
341
342 if (isnan(output_min)) {
343 xnn_log_error(
344 "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
345 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
346 goto error;
347 }
348
349 if (isnan(output_max)) {
350 xnn_log_error(
351 "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
352 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
353 goto error;
354 }
355
356 const uint16_t fp16_output_min = fp16_ieee_from_fp32_value(output_min);
357 const uint16_t fp16_output_max = fp16_ieee_from_fp32_value(output_max);
358 const float rounded_output_min = fp16_ieee_to_fp32_value(fp16_output_min);
359 const float rounded_output_max = fp16_ieee_to_fp32_value(fp16_output_max);
360 if (rounded_output_min >= rounded_output_max) {
361 xnn_log_error(
362 "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
363 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16), rounded_output_min, rounded_output_max);
364 goto error;
365 }
366
367 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
368 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
369 if (any_padding) {
370 xnn_log_error(
371 "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
372 "TensorFlow SAME padding can't be combined with explicit padding specification",
373 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16),
374 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
375 goto error;
376 }
377 }
378
379 status = xnn_status_out_of_memory;
380
381 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
382 if (average_pooling_op == NULL) {
383 xnn_log_error(
384 "failed to allocate %zu bytes for %s operator descriptor",
385 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
386 goto error;
387 }
388
389 const size_t zero_bytes = channels * sizeof(uint16_t) + XNN_EXTRA_BYTES;
390 void* zero_buffer = xnn_allocate_zero_simd_memory(zero_bytes);
391 if (zero_buffer == NULL) {
392 xnn_log_error(
393 "failed to allocate %zu bytes for %s operator zero padding",
394 zero_bytes, xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16));
395 goto error;
396 }
397 average_pooling_op->zero_buffer = zero_buffer;
398
399 average_pooling_op->padding_top = input_padding_top;
400 average_pooling_op->padding_right = input_padding_right;
401 average_pooling_op->padding_bottom = input_padding_bottom;
402 average_pooling_op->padding_left = input_padding_left;
403
404 average_pooling_op->kernel_height = pooling_height;
405 average_pooling_op->kernel_width = pooling_width;
406 average_pooling_op->stride_height = stride_height;
407 average_pooling_op->stride_width = stride_width;
408 average_pooling_op->dilation_height = 1;
409 average_pooling_op->dilation_width = 1;
410 average_pooling_op->channels = channels;
411 average_pooling_op->input_pixel_stride = input_pixel_stride;
412 average_pooling_op->output_pixel_stride = output_pixel_stride;
413
414 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f16;
415 xnn_params.f16.avgpool.init.f16(&average_pooling_op->params.f16_scaleminmax,
416 fp16_ieee_from_fp32_value(1.0f / (float) (int32_t) pooling_size), fp16_output_min, fp16_output_max);
417 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
418 if (any_padding || tf_same_padding) {
419 xnn_params.f16.pavgpool.init.f16(&average_pooling_op->params.f16_minmax, fp16_output_min, fp16_output_max);
420 average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
421 } else {
422 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
423 }
424 average_pooling_op->flags = flags;
425
426 *average_pooling_op_out = average_pooling_op;
427 return xnn_status_success;
428
429 error:
430 xnn_delete_operator(average_pooling_op);
431 return status;
432 }
433
xnn_create_average_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)434 enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
435 uint32_t input_padding_top,
436 uint32_t input_padding_right,
437 uint32_t input_padding_bottom,
438 uint32_t input_padding_left,
439 uint32_t pooling_height,
440 uint32_t pooling_width,
441 uint32_t stride_height,
442 uint32_t stride_width,
443 size_t channels,
444 size_t input_pixel_stride,
445 size_t output_pixel_stride,
446 float output_min,
447 float output_max,
448 uint32_t flags,
449 xnn_operator_t* average_pooling_op_out)
450 {
451 xnn_operator_t average_pooling_op = NULL;
452 enum xnn_status status = xnn_status_uninitialized;
453
454 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
455 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
456 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
457 goto error;
458 }
459
460 status = xnn_status_invalid_parameter;
461
462 const uint32_t pooling_size = pooling_height * pooling_width;
463 if (pooling_size == 0) {
464 xnn_log_error(
465 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
466 "pooling size dimensions must be non-zero",
467 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), pooling_width, pooling_height);
468 goto error;
469 }
470
471 if (pooling_size == 1) {
472 xnn_log_error(
473 "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
474 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
475 goto error;
476 }
477
478 if (stride_height == 0 || stride_width == 0) {
479 xnn_log_error(
480 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
481 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), stride_width, stride_height);
482 goto error;
483 }
484
485 if (stride_height > pooling_height) {
486 xnn_log_error(
487 "failed to define %s operator with %" PRIu32 " stride height: must be less than pooling height %" PRIu32,
488 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), stride_height, pooling_height);
489 return xnn_status_invalid_parameter;
490 }
491
492 if (stride_width > pooling_width) {
493 xnn_log_error(
494 "failed to define %s operator with %" PRIu32 " stride width: must be less than pooling width %" PRIu32,
495 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), stride_width, pooling_width);
496 return xnn_status_invalid_parameter;
497 }
498
499 if (channels == 0) {
500 xnn_log_error(
501 "failed to create %s operator with %zu channels: number of channels must be non-zero",
502 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), channels);
503 goto error;
504 }
505
506 if (input_pixel_stride < channels) {
507 xnn_log_error(
508 "failed to create %s operator with input pixel stride of %zu: "
509 "stride must be at least as large as the number of channels (%zu)",
510 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), input_pixel_stride, channels);
511 goto error;
512 }
513
514 if (output_pixel_stride < channels) {
515 xnn_log_error(
516 "failed to create %s operator with output pixel stride of %zu: "
517 "stride must be at least as large as the number of channels (%zu)",
518 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), output_pixel_stride, channels);
519 goto error;
520 }
521
522 if (isnan(output_min)) {
523 xnn_log_error(
524 "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
525 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
526 goto error;
527 }
528
529 if (isnan(output_max)) {
530 xnn_log_error(
531 "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
532 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
533 goto error;
534 }
535
536 if (output_min >= output_max) {
537 xnn_log_error(
538 "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
539 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32), output_min, output_max);
540 goto error;
541 }
542
543 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
544 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
545 if (any_padding) {
546 xnn_log_error(
547 "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
548 "TensorFlow SAME padding can't be combined with explicit padding specification",
549 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32),
550 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
551 goto error;
552 }
553 }
554
555 status = xnn_status_out_of_memory;
556
557 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
558 if (average_pooling_op == NULL) {
559 xnn_log_error(
560 "failed to allocate %zu bytes for %s operator descriptor",
561 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
562 goto error;
563 }
564
565 const size_t zero_bytes = channels * sizeof(float) + XNN_EXTRA_BYTES;
566 void* zero_buffer = xnn_allocate_zero_simd_memory(zero_bytes);
567 if (zero_buffer == NULL) {
568 xnn_log_error(
569 "failed to allocate %zu bytes for %s operator zero padding",
570 zero_bytes, xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32));
571 goto error;
572 }
573 average_pooling_op->zero_buffer = zero_buffer;
574
575 average_pooling_op->padding_top = input_padding_top;
576 average_pooling_op->padding_right = input_padding_right;
577 average_pooling_op->padding_bottom = input_padding_bottom;
578 average_pooling_op->padding_left = input_padding_left;
579
580 average_pooling_op->kernel_height = pooling_height;
581 average_pooling_op->kernel_width = pooling_width;
582 average_pooling_op->stride_height = stride_height;
583 average_pooling_op->stride_width = stride_width;
584 average_pooling_op->dilation_height = 1;
585 average_pooling_op->dilation_width = 1;
586 average_pooling_op->channels = channels;
587 average_pooling_op->input_pixel_stride = input_pixel_stride;
588 average_pooling_op->output_pixel_stride = output_pixel_stride;
589
590 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
591 xnn_params.f32.avgpool.init.f32(&average_pooling_op->params.f32_scaleminmax,
592 1.0f / (float) (int32_t) pooling_size, output_min, output_max);
593 const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
594 if (any_padding || tf_same_padding) {
595 xnn_params.f32.pavgpool.init.f32(&average_pooling_op->params.f32_minmax, output_min, output_max);
596 average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
597 } else {
598 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
599 }
600 average_pooling_op->flags = flags;
601
602 *average_pooling_op_out = average_pooling_op;
603 return xnn_status_success;
604
605 error:
606 xnn_delete_operator(average_pooling_op);
607 return status;
608 }
609
setup_average_pooling2d(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_data_element_size,uint32_t log2_weight_element_size,xnn_indirection_init_pavgpool2d_fn indirection_init_pavgpool2d,struct avgpool_parameters avgpool[restrict XNN_MIN_ELEMENTS (1)],struct pavgpool_parameters pavgpool[restrict1],struct gavgpool_parameters gavgpool[restrict XNN_MIN_ELEMENTS (1)],const void * params,size_t params_size,const void * global_params,size_t global_params_size,size_t num_threads,bool is_pixelwise)610 static enum xnn_status setup_average_pooling2d(
611 xnn_operator_t average_pooling_op,
612 size_t batch_size,
613 size_t input_height,
614 size_t input_width,
615 const void* input,
616 void* output,
617 uint32_t log2_data_element_size,
618 uint32_t log2_weight_element_size,
619 xnn_indirection_init_pavgpool2d_fn indirection_init_pavgpool2d,
620 struct avgpool_parameters avgpool[restrict XNN_MIN_ELEMENTS(1)],
621 struct pavgpool_parameters pavgpool[restrict 1],
622 struct gavgpool_parameters gavgpool[restrict XNN_MIN_ELEMENTS(1)],
623 const void* params,
624 size_t params_size,
625 const void* global_params,
626 size_t global_params_size,
627 size_t num_threads,
628 bool is_pixelwise)
629 {
630 assert(!is_pixelwise || pavgpool != NULL && indirection_init_pavgpool2d != NULL);
631
632 average_pooling_op->state = xnn_run_state_invalid;
633
634 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
635 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
636 xnn_operator_type_to_string(average_pooling_op->type));
637 return xnn_status_uninitialized;
638 }
639
640 if (input_width == 0 || input_height == 0) {
641 xnn_log_error(
642 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
643 xnn_operator_type_to_string(average_pooling_op->type), input_width, input_height);
644 return xnn_status_invalid_parameter;
645 }
646
647 if (batch_size == 0) {
648 average_pooling_op->state = xnn_run_state_skip;
649 return xnn_status_success;
650 }
651
652 average_pooling_op->input_height = input_height;
653 average_pooling_op->input_width = input_width;
654 average_pooling_op->input = input;
655
656 const bool tf_same_padding = (average_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0;
657 if (tf_same_padding) {
658 average_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
659 input_height, average_pooling_op->stride_height);
660 average_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
661 input_width, average_pooling_op->stride_width);
662
663 const uint32_t kernel_height = average_pooling_op->kernel_height;
664 const uint32_t kernel_width = average_pooling_op->kernel_width;
665 const uint32_t total_padding_height =
666 (average_pooling_op->output_height - 1) * average_pooling_op->stride_height + kernel_height - input_height;
667 const uint32_t total_padding_width =
668 (average_pooling_op->output_width - 1) * average_pooling_op->stride_width + kernel_width - input_width;
669 average_pooling_op->padding_top = total_padding_height / 2;
670 average_pooling_op->padding_left = total_padding_width / 2;
671 average_pooling_op->padding_bottom = total_padding_height - average_pooling_op->padding_top;
672 average_pooling_op->padding_right = total_padding_width - average_pooling_op->padding_left;
673 } else {
674 average_pooling_op->output_height = xnn_compute_convolution_output_dimension(
675 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
676 average_pooling_op->kernel_height,
677 1,
678 average_pooling_op->stride_height);
679 average_pooling_op->output_width = xnn_compute_convolution_output_dimension(
680 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
681 average_pooling_op->kernel_width,
682 1,
683 average_pooling_op->stride_width);
684 }
685 average_pooling_op->output = output;
686
687 const size_t output_height = average_pooling_op->output_height;
688 const size_t output_width = average_pooling_op->output_width;
689 const size_t padded_input_width = average_pooling_op->padding_left + input_width + average_pooling_op->padding_right;
690 const size_t padded_input_height = average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom;
691 if (padded_input_width == average_pooling_op->kernel_width && padded_input_height == average_pooling_op->kernel_height) {
692 // Global average pooling
693 const size_t input_elements = input_height * input_width;
694 const size_t input_stride_in_bytes = average_pooling_op->input_pixel_stride << log2_data_element_size;
695 const size_t channels = average_pooling_op->channels;
696 average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
697 .input = input,
698 .zero = average_pooling_op->zero_buffer,
699 .input_pixel_stride = input_stride_in_bytes,
700 .input_batch_stride = input_stride_in_bytes * input_elements,
701 .input_elements = input_elements,
702 .channels = channels,
703 .output = output,
704 .output_batch_stride = average_pooling_op->output_pixel_stride << log2_data_element_size,
705 };
706 memcpy(&average_pooling_op->context.global_average_pooling_nwc.params, global_params, global_params_size);
707 average_pooling_op->compute.type = xnn_parallelization_type_1d;
708 average_pooling_op->compute.range[0] = batch_size;
709
710 if (input_elements <= gavgpool->row_tile) {
711 average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
712 average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = gavgpool->unipass;
713 } else {
714 average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
715 average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = gavgpool->multipass;
716 }
717 } else {
718 // Non-global average pooling
719 const size_t pooling_height = average_pooling_op->kernel_height;
720 const size_t pooling_width = average_pooling_op->kernel_width;
721 const size_t pooling_size = pooling_height * pooling_width;
722
723 const uint32_t primary_tile = is_pixelwise ? pavgpool->primary_tile : avgpool->primary_tile;
724
725 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
726 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
727
728 const size_t last_input_height = average_pooling_op->last_input_height;
729 const size_t last_input_width = average_pooling_op->last_input_width;
730 if (input_height != last_input_height || input_width != last_input_width) {
731 // Micro-kernel may read up to (primary_tile - 1) elements after the end of indirection buffer.
732 const size_t indirection_buffer_size = sizeof(void*) * ((primary_tile - 1) + output_height * step_height);
733
734 const void** indirection_buffer =
735 (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
736 if (indirection_buffer == NULL) {
737 xnn_log_error("failed to allocate %zu bytes for %s operator indirection buffer",
738 indirection_buffer_size, xnn_operator_type_to_string(average_pooling_op->type));
739 return xnn_status_out_of_memory;
740 }
741 average_pooling_op->indirection_buffer = indirection_buffer;
742
743 xnn_indirection_init_dwconv2d(average_pooling_op, step_height, step_width, primary_tile, log2_data_element_size);
744
745 average_pooling_op->last_input = input;
746 average_pooling_op->last_input_height = input_height;
747 average_pooling_op->last_input_width = input_width;
748 }
749
750 const size_t channels = average_pooling_op->channels;
751
752 const size_t indirect_input_height_stride = step_height * sizeof(void*);
753 const size_t output_width_stride = average_pooling_op->output_pixel_stride << log2_data_element_size;
754 const size_t output_height_stride = output_width * output_width_stride;
755
756 if (is_pixelwise) {
757 assert(indirection_init_pavgpool2d != NULL);
758
759 if (input_height != last_input_height || input_width != last_input_width) {
760 const size_t pixelwise_buffer_size = (output_height * output_width) << log2_weight_element_size;
761 void* pixelwise_buffer = xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
762 if (pixelwise_buffer == NULL) {
763 xnn_log_error("failed to allocate %zu bytes for %s operator pixelwise buffer",
764 pixelwise_buffer_size, xnn_operator_type_to_string(average_pooling_op->type));
765 return xnn_status_out_of_memory;
766 }
767 average_pooling_op->pixelwise_buffer = pixelwise_buffer;
768
769 indirection_init_pavgpool2d(
770 input_height, input_width,
771 output_height, output_width,
772 average_pooling_op->kernel_height, average_pooling_op->kernel_width,
773 average_pooling_op->stride_height, average_pooling_op->stride_width,
774 average_pooling_op->padding_top, average_pooling_op->padding_left,
775 pixelwise_buffer);
776 }
777
778 const uint32_t incremental_tile = pavgpool->incremental_tile;
779 const size_t multipass_adjustment =
780 pooling_size > primary_tile ? round_up(pooling_size - primary_tile, incremental_tile) + primary_tile - incremental_tile : 0;
781 average_pooling_op->context.pixelwise_average_pooling = (struct pixelwise_average_pooling_context) {
782 .indirect_input = average_pooling_op->indirection_buffer,
783 .indirect_input_height_stride = indirect_input_height_stride,
784 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_data_element_size,
785 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
786 .pixelwise_buffer = average_pooling_op->pixelwise_buffer,
787 .pixelwise_buffer_height_stride = output_width << log2_data_element_size,
788 .output = output,
789 .output_batch_stride = output_height * output_height_stride,
790 .output_height_stride = output_height_stride,
791 .output_width = output_width,
792 .pooling_size = pooling_size,
793 .channels = channels,
794 .zero = average_pooling_op->zero_buffer,
795 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
796 .output_increment = output_width_stride - (channels << log2_data_element_size),
797 };
798 memcpy(&average_pooling_op->context.pixelwise_average_pooling.params, params, params_size);
799 if (pooling_size <= primary_tile) {
800 average_pooling_op->context.pixelwise_average_pooling.unipass_ukernel = pavgpool->unipass;
801 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_unipass;
802 } else {
803 average_pooling_op->context.pixelwise_average_pooling.multipass_ukernel = pavgpool->multipass;
804 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_multipass;
805 }
806 } else {
807 const uint32_t incremental_tile = avgpool->incremental_tile;
808 const size_t multipass_adjustment =
809 pooling_size > primary_tile ? round_up(pooling_size - primary_tile, incremental_tile) + primary_tile - incremental_tile : 0;
810 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
811 .indirect_input = average_pooling_op->indirection_buffer,
812 .indirect_input_height_stride = indirect_input_height_stride,
813 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) average_pooling_op->last_input),
814 .input_batch_stride = input_height * input_width * average_pooling_op->input_pixel_stride << log2_data_element_size,
815 .output = output,
816 .output_batch_stride = output_height * output_height_stride,
817 .output_height_stride = output_height_stride,
818 .output_width = output_width,
819 .pooling_size = pooling_size,
820 .channels = channels,
821 .zero = average_pooling_op->zero_buffer,
822 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
823 .output_increment = output_width_stride - (channels << log2_data_element_size),
824 .params.f32 = average_pooling_op->params.f32_scaleminmax,
825 };
826 memcpy(&average_pooling_op->context.average_pooling.params, params, params_size);
827 if (pooling_size <= primary_tile) {
828 average_pooling_op->context.average_pooling.unipass_ukernel = avgpool->unipass;
829 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
830 } else {
831 average_pooling_op->context.average_pooling.multipass_ukernel = avgpool->multipass;
832 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
833 }
834 }
835 average_pooling_op->compute.type = xnn_parallelization_type_2d;
836 average_pooling_op->compute.range[0] = batch_size;
837 average_pooling_op->compute.range[1] = output_height;
838 }
839 average_pooling_op->state = xnn_run_state_ready;
840
841 return xnn_status_success;
842 }
843
xnn_setup_average_pooling2d_nhwc_qu8(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)844 enum xnn_status xnn_setup_average_pooling2d_nhwc_qu8(
845 xnn_operator_t average_pooling_op,
846 size_t batch_size,
847 size_t input_height,
848 size_t input_width,
849 const uint8_t* input,
850 uint8_t* output,
851 pthreadpool_t threadpool)
852 {
853 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_qu8) {
854 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
855 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_qu8),
856 xnn_operator_type_to_string(average_pooling_op->type));
857 return xnn_status_invalid_parameter;
858 }
859
860 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling);
861
862 // Number of rows read in the GAVGPOOL micro-kernel.
863 const size_t input_size = input_height * input_width;
864 const size_t pooling_size = average_pooling_op->kernel_height * average_pooling_op->kernel_width;
865 const size_t gavgpool_nrows = round_up(input_size, xnn_params.qu8.gavgpool.row_tile);
866 xnn_params.qu8.gavgpool.update.qu8(
867 &average_pooling_op->params.qu8_gavgpool,
868 -(average_pooling_op->input_zero_point * (int32_t) gavgpool_nrows),
869 average_pooling_op->input_scale / (average_pooling_op->output_scale * (float) pooling_size));
870
871 return setup_average_pooling2d(
872 average_pooling_op,
873 batch_size, input_height, input_width,
874 input, output,
875 0 /* log2(sizeof(data element)) = log2(sizeof(uint8_t)) */,
876 0 /* log2(sizeof(weight element)) = log2(sizeof(uint8_t)) */,
877 NULL /* indirection_init_pavgpool2d */,
878 &xnn_params.qu8.avgpool,
879 NULL /* no PAVGPOOL micro-kernel */,
880 &xnn_params.qu8.gavgpool,
881 &average_pooling_op->params.qu8_avgpool,
882 sizeof(average_pooling_op->params.qu8_avgpool),
883 &average_pooling_op->params.qu8_gavgpool,
884 sizeof(average_pooling_op->params.qu8_gavgpool),
885 pthreadpool_get_threads_count(threadpool),
886 false /* pixelwise not supported */);
887 }
888
xnn_setup_average_pooling2d_nhwc_f16(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)889 enum xnn_status xnn_setup_average_pooling2d_nhwc_f16(
890 xnn_operator_t average_pooling_op,
891 size_t batch_size,
892 size_t input_height,
893 size_t input_width,
894 const void* input,
895 void* output,
896 pthreadpool_t threadpool)
897 {
898 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f16) {
899 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
900 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f16),
901 xnn_operator_type_to_string(average_pooling_op->type));
902 return xnn_status_invalid_parameter;
903 }
904
905 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling ||
906 average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
907
908 const void* pooling_params = &average_pooling_op->params.f16_scaleminmax;
909 size_t pooling_params_size = sizeof(average_pooling_op->params.f16_scaleminmax);
910 const bool is_pixelwise = average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling;
911 if (is_pixelwise) {
912 const size_t input_size = input_height * input_width;
913 xnn_params.f16.gavgpool.update.f16(&average_pooling_op->params.f16_scaleminmax, fp16_ieee_from_fp32_value(1.0f / (float) (int32_t) input_size));
914 pooling_params = &average_pooling_op->params.f16_minmax;
915 pooling_params_size = sizeof(average_pooling_op->params.f16_minmax);
916 }
917
918 return setup_average_pooling2d(
919 average_pooling_op,
920 batch_size, input_height, input_width,
921 input, output,
922 1 /* log2(sizeof(data element)) = log2(sizeof(half)) */,
923 1 /* log2(sizeof(weight element)) = log2(sizeof(half)) */,
924 (xnn_indirection_init_pavgpool2d_fn) xnn_indirection_init_pavgpool2d_f16,
925 &xnn_params.f16.avgpool, &xnn_params.f16.pavgpool, &xnn_params.f16.gavgpool,
926 pooling_params, pooling_params_size,
927 &average_pooling_op->params.f16_scaleminmax, sizeof(average_pooling_op->params.f16_scaleminmax),
928 pthreadpool_get_threads_count(threadpool),
929 is_pixelwise);
930 }
931
xnn_setup_average_pooling2d_nhwc_f32(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)932 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
933 xnn_operator_t average_pooling_op,
934 size_t batch_size,
935 size_t input_height,
936 size_t input_width,
937 const float* input,
938 float* output,
939 pthreadpool_t threadpool)
940 {
941 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
942 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
943 xnn_operator_type_to_string(xnn_operator_type_average_pooling_nhwc_f32),
944 xnn_operator_type_to_string(average_pooling_op->type));
945 return xnn_status_invalid_parameter;
946 }
947
948 assert(average_pooling_op->ukernel.type == xnn_ukernel_type_average_pooling ||
949 average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling);
950
951 const void* pooling_params = &average_pooling_op->params.f32_scaleminmax;
952 size_t pooling_params_size = sizeof(average_pooling_op->params.f32_scaleminmax);
953 const bool is_pixelwise = average_pooling_op->ukernel.type == xnn_ukernel_type_pixelwise_average_pooling;
954 if (is_pixelwise) {
955 const size_t input_size = input_height * input_width;
956 xnn_params.f32.gavgpool.update.f32(&average_pooling_op->params.f32_scaleminmax, 1.0f / (float) (int32_t) input_size);
957 pooling_params = &average_pooling_op->params.f32_minmax;
958 pooling_params_size = sizeof(average_pooling_op->params.f32_minmax);
959 }
960
961 return setup_average_pooling2d(
962 average_pooling_op,
963 batch_size, input_height, input_width,
964 input, output,
965 2 /* log2(sizeof(data element)) = log2(sizeof(float)) */,
966 2 /* log2(sizeof(weight element)) = log2(sizeof(float)) */,
967 (xnn_indirection_init_pavgpool2d_fn) xnn_indirection_init_pavgpool2d_f32,
968 &xnn_params.f32.avgpool, &xnn_params.f32.pavgpool, &xnn_params.f32.gavgpool,
969 pooling_params, pooling_params_size,
970 &average_pooling_op->params.f32_scaleminmax, sizeof(average_pooling_op->params.f32_scaleminmax),
971 pthreadpool_get_threads_count(threadpool),
972 is_pixelwise);
973 }
974