1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <xnnpack.h>
18 #include <xnnpack/allocator.h>
19 #include <xnnpack/operator.h>
20 #include <xnnpack/common.h>
21 #include <xnnpack/log.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/params-init.h>
24 #include <xnnpack/params.h>
25 #include <xnnpack/indirection.h>
26
27
compute_output_dimension(size_t padded_input_dimension,size_t pooling_dimension,size_t stride_dimension)28 static inline size_t compute_output_dimension(
29 size_t padded_input_dimension,
30 size_t pooling_dimension,
31 size_t stride_dimension)
32 {
33 return (padded_input_dimension - pooling_dimension) / stride_dimension + 1;
34 }
35
xnn_create_average_pooling2d_nhwc_q8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)36 enum xnn_status xnn_create_average_pooling2d_nhwc_q8(
37 uint32_t input_padding_top,
38 uint32_t input_padding_right,
39 uint32_t input_padding_bottom,
40 uint32_t input_padding_left,
41 uint32_t pooling_height,
42 uint32_t pooling_width,
43 uint32_t stride_height,
44 uint32_t stride_width,
45 size_t channels,
46 size_t input_pixel_stride,
47 size_t output_pixel_stride,
48 uint8_t input_zero_point,
49 float input_scale,
50 uint8_t output_zero_point,
51 float output_scale,
52 uint8_t output_min,
53 uint8_t output_max,
54 uint32_t flags,
55 xnn_operator_t* average_pooling_op_out)
56 {
57 xnn_operator_t average_pooling_op = NULL;
58 enum xnn_status status = xnn_status_uninitialized;
59
60 if (!xnn_params.initialized) {
61 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
62 goto error;
63 }
64
65 status = xnn_status_invalid_parameter;
66
67 const uint32_t pooling_size = pooling_height * pooling_width;
68 if (pooling_size == 0) {
69 xnn_log_error(
70 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
71 "pooling size dimensions must be non-zero",
72 pooling_width, pooling_height);
73 goto error;
74 }
75
76 if (pooling_size == 1) {
77 xnn_log_error(
78 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
79 goto error;
80 }
81
82 if (stride_height == 0 || stride_width == 0) {
83 xnn_log_error(
84 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
85 "stride dimensions must be non-zero",
86 stride_width, stride_height);
87 goto error;
88 }
89
90 if (channels == 0) {
91 xnn_log_error(
92 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
93 channels);
94 goto error;
95 }
96
97 if (input_pixel_stride < channels) {
98 xnn_log_error(
99 "failed to create Average Pooling operator with input pixel stride of %zu: "
100 "stride must be at least as large as the number of channels (%zu)",
101 input_pixel_stride, channels);
102 goto error;
103 }
104
105 if (output_pixel_stride < channels) {
106 xnn_log_error(
107 "failed to create Average Pooling operator with output pixel stride of %zu: "
108 "stride must be at least as large as the number of channels (%zu)",
109 output_pixel_stride, channels);
110 goto error;
111 }
112
113 if (input_scale <= 0.0f || !isnormal(input_scale)) {
114 xnn_log_error(
115 "failed to create Average Pooling operator with %.7g input scale: "
116 "scale must be finite, normalized, and positive",
117 input_scale);
118 goto error;
119 }
120
121 if (output_scale <= 0.0f || !isnormal(output_scale)) {
122 xnn_log_error(
123 "failed to create Average Pooling operator with %.7g output scale: "
124 "scale must be finite, normalized, and positive",
125 output_scale);
126 goto error;
127 }
128
129 if (output_min >= output_max) {
130 xnn_log_error(
131 "failed to create Average Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
132 "range min must be below range max",
133 output_min, output_max);
134 goto error;
135 }
136
137 status = xnn_status_unsupported_parameter;
138
139 const float input_output_scale = input_scale / output_scale;
140 if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
141 xnn_log_error(
142 "failed to create Average Pooling operator with %.7g input scale and %.7g output scale: "
143 "input-to-output scale ratio (%.7f) must be in [2**-8, 2**8) range",
144 input_scale, output_scale, input_output_scale);
145 goto error;
146 }
147
148 if (pooling_size >= 16777216) {
149 xnn_log_error(
150 "failed to create Average Pooling operator with %"PRIu32" (%" PRIu32 "x%" PRIu32 ") pooling elements: "
151 "the number of elements in the pooling area must be below 2**24",
152 pooling_size, pooling_width, pooling_height);
153 goto error;
154 }
155
156 status = xnn_status_out_of_memory;
157
158 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
159 if (average_pooling_op == NULL) {
160 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
161 goto error;
162 }
163
164 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
165 const uint32_t mr = xnn_params.q8.avgpool.mr;
166 const uint32_t qr = xnn_params.q8.avgpool.qr;
167 if (any_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
168 void* zero_buffer = xnn_allocate_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
169 if (zero_buffer == NULL) {
170 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
171 channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
172 goto error;
173 }
174 memset(zero_buffer, input_zero_point, channels * sizeof(uint8_t));
175 average_pooling_op->zero_buffer = zero_buffer;
176 }
177
178 average_pooling_op->padding_top = input_padding_top;
179 average_pooling_op->padding_right = input_padding_right;
180 average_pooling_op->padding_bottom = input_padding_bottom;
181 average_pooling_op->padding_left = input_padding_left;
182
183 average_pooling_op->kernel_height = pooling_height;
184 average_pooling_op->kernel_width = pooling_width;
185 average_pooling_op->stride_height = stride_height;
186 average_pooling_op->stride_width = stride_width;
187 average_pooling_op->dilation_height = 1;
188 average_pooling_op->dilation_width = 1;
189 average_pooling_op->channels = channels;
190 average_pooling_op->input_pixel_stride = input_pixel_stride;
191 average_pooling_op->output_pixel_stride = output_pixel_stride;
192
193 // Number of rows read in the micro-kernel.
194 const size_t nrows = round_up(doz(pooling_size, mr), qr) + mr;
195 average_pooling_op->q8_avgpool_params =
196 xnn_init_q8_avgpool_params(
197 (int32_t) -((uint32_t) input_zero_point * (uint32_t) nrows),
198 input_scale / (output_scale * (float) pooling_size),
199 output_zero_point, output_min, output_max);
200
201 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_q8;
202 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
203
204 *average_pooling_op_out = average_pooling_op;
205 return xnn_status_success;
206
207 error:
208 xnn_delete_operator(average_pooling_op);
209 return status;
210 }
211
xnn_create_average_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * average_pooling_op_out)212 enum xnn_status xnn_create_average_pooling2d_nhwc_f32(
213 uint32_t input_padding_top,
214 uint32_t input_padding_right,
215 uint32_t input_padding_bottom,
216 uint32_t input_padding_left,
217 uint32_t pooling_height,
218 uint32_t pooling_width,
219 uint32_t stride_height,
220 uint32_t stride_width,
221 size_t channels,
222 size_t input_pixel_stride,
223 size_t output_pixel_stride,
224 float output_min,
225 float output_max,
226 uint32_t flags,
227 xnn_operator_t* average_pooling_op_out)
228 {
229 xnn_operator_t average_pooling_op = NULL;
230 enum xnn_status status = xnn_status_uninitialized;
231
232 if (!xnn_params.initialized) {
233 xnn_log_error("failed to create Average Pooling operator: XNNPACK is not initialized");
234 goto error;
235 }
236
237 status = xnn_status_invalid_parameter;
238
239 const uint32_t pooling_size = pooling_height * pooling_width;
240 if (pooling_size == 0) {
241 xnn_log_error(
242 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
243 "pooling size dimensions must be non-zero",
244 pooling_width, pooling_height);
245 goto error;
246 }
247
248 if (pooling_size == 1) {
249 xnn_log_error(
250 "failed to create Average Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
251 goto error;
252 }
253
254 if (stride_height == 0 || stride_width == 0) {
255 xnn_log_error(
256 "failed to create Average Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
257 "stride dimensions must be non-zero",
258 stride_width, stride_height);
259 goto error;
260 }
261
262 if (channels == 0) {
263 xnn_log_error(
264 "failed to create Average Pooling operator with %zu channels: number of channels must be non-zero",
265 channels);
266 goto error;
267 }
268
269 if (input_pixel_stride < channels) {
270 xnn_log_error(
271 "failed to create Average Pooling operator with input pixel stride of %zu: "
272 "stride must be at least as large as the number of channels (%zu)",
273 input_pixel_stride, channels);
274 goto error;
275 }
276
277 if (output_pixel_stride < channels) {
278 xnn_log_error(
279 "failed to create Average Pooling operator with output pixel stride of %zu: "
280 "stride must be at least as large as the number of channels (%zu)",
281 output_pixel_stride, channels);
282 goto error;
283 }
284
285 if (isnan(output_min)) {
286 xnn_log_error(
287 "failed to create Average Pooling operator with NaN output lower bound: lower bound must be non-NaN");
288 goto error;
289 }
290
291 if (isnan(output_max)) {
292 xnn_log_error(
293 "failed to create Average Pooling operator with NaN output upper bound: upper bound must be non-NaN");
294 goto error;
295 }
296
297 if (output_min >= output_max) {
298 xnn_log_error(
299 "failed to create Average Pooling operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
300 output_min, output_max);
301 goto error;
302 }
303
304 status = xnn_status_out_of_memory;
305
306 average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
307 if (average_pooling_op == NULL) {
308 xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
309 goto error;
310 }
311
312 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
313 const uint32_t mr = xnn_params.f32.avgpool.mr;
314 const uint32_t qr = xnn_params.f32.avgpool.qr;
315 if (any_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
316 void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
317 if (zero_buffer == NULL) {
318 xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
319 channels * sizeof(float) + XNN_EXTRA_BYTES);
320 goto error;
321 }
322 average_pooling_op->zero_buffer = zero_buffer;
323 }
324
325 average_pooling_op->padding_top = input_padding_top;
326 average_pooling_op->padding_right = input_padding_right;
327 average_pooling_op->padding_bottom = input_padding_bottom;
328 average_pooling_op->padding_left = input_padding_left;
329
330 average_pooling_op->kernel_height = pooling_height;
331 average_pooling_op->kernel_width = pooling_width;
332 average_pooling_op->stride_height = stride_height;
333 average_pooling_op->stride_width = stride_width;
334 average_pooling_op->dilation_height = 1;
335 average_pooling_op->dilation_width = 1;
336 average_pooling_op->channels = channels;
337 average_pooling_op->input_pixel_stride = input_pixel_stride;
338 average_pooling_op->output_pixel_stride = output_pixel_stride;
339
340 average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
341 if (any_padding) {
342 average_pooling_op->f32_output_params =
343 xnn_init_f32_output_params(output_min, output_max);
344
345 average_pooling_op->ukernel.type = xnn_ukernel_type_pixelwise_average_pooling;
346 } else {
347 average_pooling_op->f32_avgpool_params =
348 xnn_init_f32_avgpool_params(1.0f / (float) pooling_size, output_min, output_max);
349
350 average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
351 }
352
353 *average_pooling_op_out = average_pooling_op;
354 return xnn_status_success;
355
356 error:
357 xnn_delete_operator(average_pooling_op);
358 return status;
359 }
360
xnn_setup_average_pooling2d_nhwc_q8(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)361 enum xnn_status xnn_setup_average_pooling2d_nhwc_q8(
362 xnn_operator_t average_pooling_op,
363 size_t batch_size,
364 size_t input_height,
365 size_t input_width,
366 const uint8_t* input,
367 uint8_t* output,
368 pthreadpool_t threadpool)
369 {
370 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_q8) {
371 xnn_log_error("failed to setup Average Pooling (Q8) operator: operator type mismatch");
372 return xnn_status_invalid_parameter;
373 }
374 average_pooling_op->state = xnn_run_state_invalid;
375
376 if (!xnn_params.initialized) {
377 xnn_log_error("failed to setup Average Pooling operator: XNNPACK is not initialized");
378 return xnn_status_uninitialized;
379 }
380
381 if (input_width == 0 || input_height == 0) {
382 xnn_log_error(
383 "failed to setup Average Pooling operator with %zux%zu input: input dimensions must be non-zero",
384 input_width, input_height);
385 return xnn_status_invalid_parameter;
386 }
387
388 if (batch_size == 0) {
389 average_pooling_op->state = xnn_run_state_skip;
390 return xnn_status_success;
391 }
392
393 average_pooling_op->batch_size = batch_size;
394 average_pooling_op->input_height = input_height;
395 average_pooling_op->input_width = input_width;
396 average_pooling_op->input = input;
397
398 average_pooling_op->output_height = compute_output_dimension(
399 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
400 average_pooling_op->kernel_height,
401 average_pooling_op->stride_height);
402 average_pooling_op->output_width = compute_output_dimension(
403 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
404 average_pooling_op->kernel_width,
405 average_pooling_op->stride_width);
406 average_pooling_op->output = output;
407
408 size_t valid_batch_size = 0;
409 if (input == average_pooling_op->last_input &&
410 input_height == average_pooling_op->last_input_height &&
411 input_width == average_pooling_op->last_input_width)
412 {
413 valid_batch_size = average_pooling_op->valid_batch_size;
414 if (batch_size <= valid_batch_size) {
415 average_pooling_op->compute.range[0] = batch_size;
416 average_pooling_op->context.average_pooling.output = output;
417 average_pooling_op->state = xnn_run_state_ready;
418 return xnn_status_success;
419 }
420 }
421
422 const size_t pooling_height = average_pooling_op->kernel_height;
423 const size_t pooling_width = average_pooling_op->kernel_width;
424 const size_t pooling_size = pooling_height * pooling_width;
425 const size_t output_height = average_pooling_op->output_height;
426 const size_t output_width = average_pooling_op->output_width;
427 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
428 const uint32_t mr = xnn_params.q8.avgpool.mr;
429
430 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
431 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
432 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
433
434 const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
435 if (indirection_buffer == NULL) {
436 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
437 return xnn_status_out_of_memory;
438 }
439 average_pooling_op->indirection_buffer = indirection_buffer;
440
441 xnn_indirection_init_dwconv2d(
442 average_pooling_op, valid_batch_size, step_height, step_width, 0 /* log2(sizeof(uint8_t)) */);
443
444 const uint32_t qr = xnn_params.q8.avgpool.qr;
445 const size_t channels = average_pooling_op->channels;
446
447 const size_t indirect_input_height_stride = step_height * sizeof(void*);
448 const size_t output_width_stride = average_pooling_op->output_pixel_stride * sizeof(uint8_t);
449 const size_t output_height_stride = output_width * output_width_stride;
450
451 const size_t multipass_adjustment =
452 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
453 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
454 .indirect_input = indirection_buffer,
455 .indirect_input_batch_stride = output_height * indirect_input_height_stride,
456 .indirect_input_height_stride = indirect_input_height_stride,
457 .output = output,
458 .output_batch_stride = output_height * output_height_stride,
459 .output_height_stride = output_height_stride,
460 .output_width = output_width,
461 .pooling_size = pooling_size,
462 .channels = channels,
463 .zero = average_pooling_op->zero_buffer,
464 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
465 .output_increment = output_width_stride - channels * sizeof(uint8_t),
466 .params.q8 = average_pooling_op->q8_avgpool_params,
467 };
468 average_pooling_op->compute.type = xnn_parallelization_type_2d;
469 average_pooling_op->compute.range[0] = batch_size;
470 average_pooling_op->compute.range[1] = output_height;
471
472 if (pooling_size <= mr) {
473 average_pooling_op->context.average_pooling.unipass_ukernel = xnn_params.q8.avgpool.up;
474 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
475 } else {
476 average_pooling_op->context.average_pooling.multipass_ukernel = xnn_params.q8.avgpool.mp;
477 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
478 }
479 average_pooling_op->state = xnn_run_state_ready;
480
481 average_pooling_op->last_input = input;
482 average_pooling_op->last_input_height = input_height;
483 average_pooling_op->last_input_width = input_width;
484 average_pooling_op->valid_batch_size = max(valid_batch_size, batch_size);
485
486 return xnn_status_success;
487 }
488
xnn_setup_average_pooling2d_nhwc_f32(xnn_operator_t average_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)489 enum xnn_status xnn_setup_average_pooling2d_nhwc_f32(
490 xnn_operator_t average_pooling_op,
491 size_t batch_size,
492 size_t input_height,
493 size_t input_width,
494 const float* input,
495 float* output,
496 pthreadpool_t threadpool)
497 {
498 if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
499 xnn_log_error("failed to setup Average Pooling (F32) operator: operator type mismatch");
500 return xnn_status_invalid_parameter;
501 }
502 average_pooling_op->state = xnn_run_state_invalid;
503
504 if (!xnn_params.initialized) {
505 xnn_log_error("failed to setup Average Pooling operator: XNNPACK is not initialized");
506 return xnn_status_uninitialized;
507 }
508
509 if (input_width == 0 || input_height == 0) {
510 xnn_log_error(
511 "failed to setup Average Pooling operator with %zux%zu input: input dimensions must be non-zero",
512 input_width, input_height);
513 return xnn_status_invalid_parameter;
514 }
515
516 if (batch_size == 0) {
517 average_pooling_op->state = xnn_run_state_skip;
518 return xnn_status_success;
519 }
520
521 average_pooling_op->batch_size = batch_size;
522 average_pooling_op->input_height = input_height;
523 average_pooling_op->input_width = input_width;
524 average_pooling_op->input = input;
525
526 average_pooling_op->output_height = compute_output_dimension(
527 average_pooling_op->padding_top + input_height + average_pooling_op->padding_bottom,
528 average_pooling_op->kernel_height,
529 average_pooling_op->stride_height);
530 average_pooling_op->output_width = compute_output_dimension(
531 average_pooling_op->padding_left + input_width + average_pooling_op->padding_right,
532 average_pooling_op->kernel_width,
533 average_pooling_op->stride_width);
534 average_pooling_op->output = output;
535
536 size_t valid_batch_size = 0;
537 if (input == average_pooling_op->last_input &&
538 input_height == average_pooling_op->last_input_height &&
539 input_width == average_pooling_op->last_input_width)
540 {
541 valid_batch_size = average_pooling_op->valid_batch_size;
542 if (batch_size <= valid_batch_size) {
543 average_pooling_op->compute.range[0] = batch_size;
544 average_pooling_op->context.average_pooling.output = output;
545 average_pooling_op->state = xnn_run_state_ready;
546 return xnn_status_success;
547 }
548 }
549
550 const size_t pooling_height = average_pooling_op->kernel_height;
551 const size_t pooling_width = average_pooling_op->kernel_width;
552 const size_t pooling_size = pooling_height * pooling_width;
553 const size_t output_height = average_pooling_op->output_height;
554 const size_t output_width = average_pooling_op->output_width;
555 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
556 const uint32_t mr = xnn_params.f32.avgpool.mr;
557 assert(mr == xnn_params.f32.pavgpool.mr);
558
559 const size_t step_width = min(average_pooling_op->stride_width, pooling_width);
560 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
561 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
562
563 const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
564 if (indirection_buffer == NULL) {
565 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
566 return xnn_status_out_of_memory;
567 }
568 average_pooling_op->indirection_buffer = indirection_buffer;
569
570 xnn_indirection_init_dwconv2d(
571 average_pooling_op, valid_batch_size, step_height, step_width, 2 /* log2(sizeof(float)) */);
572
573 const size_t channels = average_pooling_op->channels;
574
575 const size_t indirect_input_height_stride = step_height * sizeof(void*);
576 const size_t output_width_stride = average_pooling_op->output_pixel_stride * sizeof(float);
577 const size_t output_height_stride = output_width * output_width_stride;
578
579 switch (average_pooling_op->ukernel.type) {
580 case xnn_ukernel_type_average_pooling:
581 {
582 const uint32_t qr = xnn_params.f32.avgpool.qr;
583 const size_t multipass_adjustment =
584 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
585 average_pooling_op->context.average_pooling = (struct average_pooling_context) {
586 .indirect_input = indirection_buffer,
587 .indirect_input_batch_stride = output_height * indirect_input_height_stride,
588 .indirect_input_height_stride = indirect_input_height_stride,
589 .output = output,
590 .output_batch_stride = output_height * output_height_stride,
591 .output_height_stride = output_height_stride,
592 .output_width = output_width,
593 .pooling_size = pooling_size,
594 .channels = channels,
595 .zero = average_pooling_op->zero_buffer,
596 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
597 .output_increment = output_width_stride - channels * sizeof(float),
598 .params.f32 = average_pooling_op->f32_avgpool_params,
599 };
600 if (pooling_size <= mr) {
601 average_pooling_op->context.average_pooling.unipass_ukernel = xnn_params.f32.avgpool.up;
602 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_unipass;
603 } else {
604 average_pooling_op->context.average_pooling.multipass_ukernel = xnn_params.f32.avgpool.mp;
605 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_average_pooling_multipass;
606 }
607 break;
608 }
609 case xnn_ukernel_type_pixelwise_average_pooling:
610 {
611 if (input_height != average_pooling_op->last_input_height ||
612 input_width != average_pooling_op->last_input_width)
613 {
614 const size_t pixelwise_buffer_size = output_height * output_width * sizeof(float);
615 float* pixelwise_buffer = (float*) xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
616 if (pixelwise_buffer == NULL) {
617 xnn_log_error("failed to allocate %zu bytes for pixelwise buffer", pixelwise_buffer_size);
618 return xnn_status_out_of_memory;
619 }
620 average_pooling_op->pixelwise_buffer = pixelwise_buffer;
621
622 float* pixelwise_pointer = pixelwise_buffer;
623 for (size_t output_y = 0; output_y < output_height; output_y++) {
624 const size_t input_y_start = doz(output_y * average_pooling_op->stride_height, average_pooling_op->padding_top);
625 const size_t input_y_end =
626 min(doz(output_y * average_pooling_op->stride_height + average_pooling_op->kernel_height, average_pooling_op->padding_top), input_height);
627 const uint32_t input_y_range = (uint32_t) (input_y_end - input_y_start);
628 for (size_t output_x = 0; output_x < output_width; output_x++) {
629 const size_t input_x_start = doz(output_x * average_pooling_op->stride_width, average_pooling_op->padding_left);
630 const size_t input_x_end =
631 min(doz(output_x * average_pooling_op->stride_width + average_pooling_op->kernel_width, average_pooling_op->padding_left), input_width);
632 const uint32_t input_x_range = (uint32_t) (input_x_end - input_x_start);
633 *pixelwise_pointer++ = 1.0f / ((float) (int32_t) (input_y_range * input_x_range));
634 }
635 }
636 }
637
638 const uint32_t qr = xnn_params.f32.pavgpool.qr;
639 const size_t multipass_adjustment =
640 pooling_size > mr ? round_up(pooling_size - mr, qr) + mr - qr : 0;
641 average_pooling_op->context.pixelwise_average_pooling = (struct pixelwise_average_pooling_context) {
642 .indirect_input = indirection_buffer,
643 .indirect_input_batch_stride = output_height * indirect_input_height_stride,
644 .indirect_input_height_stride = indirect_input_height_stride,
645 .pixelwise_buffer = average_pooling_op->pixelwise_buffer,
646 .pixelwise_buffer_height_stride = output_width * sizeof(float),
647 .output = output,
648 .output_batch_stride = output_height * output_height_stride,
649 .output_height_stride = output_height_stride,
650 .output_width = output_width,
651 .pooling_size = pooling_size,
652 .channels = channels,
653 .zero = average_pooling_op->zero_buffer,
654 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
655 .output_increment = output_width_stride - channels * sizeof(float),
656 .params.f32 = average_pooling_op->f32_output_params,
657 };
658 if (pooling_size <= mr) {
659 average_pooling_op->context.pixelwise_average_pooling.unipass_ukernel = xnn_params.f32.pavgpool.up;
660 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_unipass;
661 } else {
662 average_pooling_op->context.pixelwise_average_pooling.multipass_ukernel = xnn_params.f32.pavgpool.mp;
663 average_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_pixelwise_average_pooling_multipass;
664 }
665 break;
666 }
667 default:
668 XNN_UNREACHABLE;
669 }
670 average_pooling_op->compute.type = xnn_parallelization_type_2d;
671 average_pooling_op->compute.range[0] = batch_size;
672 average_pooling_op->compute.range[1] = output_height;
673 average_pooling_op->state = xnn_run_state_ready;
674
675 average_pooling_op->last_input = input;
676 average_pooling_op->last_input_height = input_height;
677 average_pooling_op->last_input_width = input_width;
678 average_pooling_op->valid_batch_size = max(valid_batch_size, batch_size);
679
680 return xnn_status_success;
681 }
682