1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <fp16.h>
18
19 #include <xnnpack.h>
20 #include <xnnpack/allocator.h>
21 #include <xnnpack/common.h>
22 #include <xnnpack/indirection.h>
23 #include <xnnpack/log.h>
24 #include <xnnpack/math.h>
25 #include <xnnpack/operator.h>
26 #include <xnnpack/params-init.h>
27 #include <xnnpack/params.h>
28
29
compute_output_dimension(size_t padded_input_dimension,size_t kernel_dimension,size_t dilation_dimension,size_t stride_dimension)30 static inline size_t compute_output_dimension(
31 size_t padded_input_dimension,
32 size_t kernel_dimension,
33 size_t dilation_dimension,
34 size_t stride_dimension)
35 {
36 const size_t effective_kernel_dimension = (kernel_dimension - 1) * dilation_dimension + 1;
37 return (padded_input_dimension - effective_kernel_dimension) / stride_dimension + 1;
38 }
39
compute_output_dimension_with_tf_same_padding(size_t input_dimension,size_t stride_dimension)40 static inline size_t compute_output_dimension_with_tf_same_padding(
41 size_t input_dimension,
42 size_t stride_dimension)
43 {
44 return divide_round_up(input_dimension, stride_dimension);
45 }
46
create_max_pooling2d_nhwc(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * max_pooling_op_out)47 static enum xnn_status create_max_pooling2d_nhwc(
48 uint32_t input_padding_top,
49 uint32_t input_padding_right,
50 uint32_t input_padding_bottom,
51 uint32_t input_padding_left,
52 uint32_t pooling_height,
53 uint32_t pooling_width,
54 uint32_t stride_height,
55 uint32_t stride_width,
56 uint32_t dilation_height,
57 uint32_t dilation_width,
58 size_t channels,
59 size_t input_pixel_stride,
60 size_t output_pixel_stride,
61 uint32_t flags,
62 const void* params,
63 size_t params_size,
64 uint32_t datatype_init_flags,
65 enum xnn_operator_type operator_type,
66 xnn_operator_t* max_pooling_op_out)
67 {
68 xnn_operator_t max_pooling_op = NULL;
69 enum xnn_status status = xnn_status_uninitialized;
70
71 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
72 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
73 xnn_operator_type_to_string(operator_type));
74 return xnn_status_uninitialized;
75 }
76
77 status = xnn_status_unsupported_hardware;
78
79 if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
80 xnn_log_error(
81 "failed to create %s operator: operations on data type are not supported",
82 xnn_operator_type_to_string(operator_type));
83 goto error;
84 }
85
86 status = xnn_status_invalid_parameter;
87
88 const uint32_t pooling_size = pooling_height * pooling_width;
89 if (pooling_size == 0) {
90 xnn_log_error(
91 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
92 "pooling size dimensions must be non-zero",
93 xnn_operator_type_to_string(operator_type),
94 pooling_width, pooling_height);
95 goto error;
96 }
97
98 if (pooling_size == 1) {
99 xnn_log_error(
100 "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
101 xnn_operator_type_to_string(operator_type));
102 goto error;
103 }
104
105 if (stride_height == 0 || stride_width == 0) {
106 xnn_log_error(
107 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
108 xnn_operator_type_to_string(operator_type), stride_width, stride_height);
109 goto error;
110 }
111
112 if (dilation_height == 0 || dilation_width == 0) {
113 xnn_log_error(
114 "failed to create %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero",
115 xnn_operator_type_to_string(operator_type), dilation_width, dilation_height);
116 goto error;
117 }
118
119 if (channels == 0) {
120 xnn_log_error(
121 "failed to create %s operator with %zu channels: number of channels must be non-zero",
122 xnn_operator_type_to_string(operator_type), channels);
123 goto error;
124 }
125
126 if (input_pixel_stride < channels) {
127 xnn_log_error(
128 "failed to create %s operator with input pixel stride of %zu: "
129 "stride must be at least as large as the number of channels (%zu)",
130 xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
131 goto error;
132 }
133
134 if (output_pixel_stride < channels) {
135 xnn_log_error(
136 "failed to create %s operator with output pixel stride of %zu: "
137 "stride must be at least as large as the number of channels (%zu)",
138 xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
139 goto error;
140 }
141
142 const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
143 if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
144 if (any_padding) {
145 xnn_log_error(
146 "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
147 "TensorFlow SAME padding can't be combined with explicit padding specification",
148 xnn_operator_type_to_string(operator_type),
149 input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
150 goto error;
151 }
152 }
153
154 status = xnn_status_out_of_memory;
155
156 max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
157 if (max_pooling_op == NULL) {
158 xnn_log_error(
159 "failed to allocate %zu bytes for %s operator descriptor",
160 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
161 goto error;
162 }
163
164 max_pooling_op->padding_top = input_padding_top;
165 max_pooling_op->padding_right = input_padding_right;
166 max_pooling_op->padding_bottom = input_padding_bottom;
167 max_pooling_op->padding_left = input_padding_left;
168
169 max_pooling_op->kernel_height = pooling_height;
170 max_pooling_op->kernel_width = pooling_width;
171 max_pooling_op->stride_height = stride_height;
172 max_pooling_op->stride_width = stride_width;
173 max_pooling_op->dilation_height = dilation_height;
174 max_pooling_op->dilation_width = dilation_width;
175 max_pooling_op->channels = channels;
176 max_pooling_op->input_pixel_stride = input_pixel_stride;
177 max_pooling_op->output_pixel_stride = output_pixel_stride;
178
179 memcpy(&max_pooling_op->params, params, params_size);
180 max_pooling_op->type = operator_type;
181 max_pooling_op->flags = flags;
182
183 max_pooling_op->state = xnn_run_state_invalid;
184
185 *max_pooling_op_out = max_pooling_op;
186 return xnn_status_success;
187
188 error:
189 xnn_delete_operator(max_pooling_op);
190 return status;
191 }
192
setup_max_pooling2d_nhwc(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_input_element_size,uint32_t log2_output_element_size,struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS (1)],const void * params,size_t params_size,size_t num_threads)193 static enum xnn_status setup_max_pooling2d_nhwc(
194 xnn_operator_t max_pooling_op,
195 size_t batch_size,
196 size_t input_height,
197 size_t input_width,
198 const void* input,
199 void* output,
200 uint32_t log2_input_element_size,
201 uint32_t log2_output_element_size,
202 struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS(1)],
203 const void* params,
204 size_t params_size,
205 size_t num_threads)
206 {
207 max_pooling_op->state = xnn_run_state_invalid;
208
209 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
210 xnn_log_error(
211 "failed to setup %s operator: XNNPACK is not initialized",
212 xnn_operator_type_to_string(max_pooling_op->type));
213 return xnn_status_uninitialized;
214 }
215
216 if (input_width == 0 || input_height == 0) {
217 xnn_log_error(
218 "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
219 xnn_operator_type_to_string(max_pooling_op->type), input_width, input_height);
220 return xnn_status_invalid_parameter;
221 }
222
223 if (batch_size == 0) {
224 max_pooling_op->state = xnn_run_state_skip;
225 return xnn_status_success;
226 }
227
228 max_pooling_op->input_height = input_height;
229 max_pooling_op->input_width = input_width;
230 max_pooling_op->input = input;
231
232 if (max_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
233 max_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
234 input_height, max_pooling_op->stride_height);
235 max_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
236 input_width, max_pooling_op->stride_width);
237
238 const uint32_t effective_kernel_height = (max_pooling_op->kernel_height - 1) * max_pooling_op->dilation_height + 1;
239 const uint32_t effective_kernel_width = (max_pooling_op->kernel_width - 1) * max_pooling_op->dilation_width + 1;
240 const uint32_t total_padding_height =
241 doz((max_pooling_op->output_height - 1) * max_pooling_op->stride_height + effective_kernel_height, input_height);
242 const uint32_t total_padding_width =
243 doz((max_pooling_op->output_width - 1) * max_pooling_op->stride_width + effective_kernel_width, input_width);
244 max_pooling_op->padding_top = total_padding_height / 2;
245 max_pooling_op->padding_left = total_padding_width / 2;
246 max_pooling_op->padding_bottom = total_padding_height - max_pooling_op->padding_top;
247 max_pooling_op->padding_right = total_padding_width - max_pooling_op->padding_left;
248 } else {
249 max_pooling_op->output_height = compute_output_dimension(
250 max_pooling_op->padding_top + input_height + max_pooling_op->padding_bottom,
251 max_pooling_op->kernel_height,
252 max_pooling_op->dilation_height,
253 max_pooling_op->stride_height);
254 max_pooling_op->output_width = compute_output_dimension(
255 max_pooling_op->padding_left + input_width + max_pooling_op->padding_right,
256 max_pooling_op->kernel_width,
257 max_pooling_op->dilation_width,
258 max_pooling_op->stride_width);
259 }
260
261 const size_t pooling_height = max_pooling_op->kernel_height;
262 const size_t pooling_width = max_pooling_op->kernel_width;
263 const size_t pooling_size = pooling_height * pooling_width;
264 const size_t output_height = max_pooling_op->output_height;
265 const size_t output_width = max_pooling_op->output_width;
266 const uint32_t mr = maxpool->mr;
267
268 const size_t step_width =
269 max_pooling_op->dilation_width > 1 ? pooling_width : min(max_pooling_op->stride_width, pooling_width);
270 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
271
272 if (input_height != max_pooling_op->last_input_height ||
273 input_width != max_pooling_op->last_input_width)
274 {
275 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
276 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + output_height * step_height);
277 const void** indirection_buffer =
278 (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
279 if (indirection_buffer == NULL) {
280 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
281 return xnn_status_out_of_memory;
282 }
283 max_pooling_op->indirection_buffer = indirection_buffer;
284
285 xnn_indirection_init_maxpool2d(max_pooling_op, step_height, step_width, log2_input_element_size);
286
287 max_pooling_op->last_input = input;
288 max_pooling_op->last_input_height = input_height;
289 max_pooling_op->last_input_width = input_width;
290 }
291
292 const uint32_t qr = maxpool->qr;
293 const size_t channels = max_pooling_op->channels;
294
295 const size_t indirect_input_height_stride = step_height * sizeof(void*);
296 const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size;
297 const size_t output_height_stride = output_width * output_width_stride;
298 const size_t multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr;
299
300 max_pooling_op->context.max_pooling = (struct max_pooling_context) {
301 .indirect_input = max_pooling_op->indirection_buffer,
302 .indirect_input_height_stride = indirect_input_height_stride,
303 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) max_pooling_op->last_input),
304 .input_batch_stride = (input_height * input_width * max_pooling_op->input_pixel_stride) << log2_input_element_size,
305 .output = output,
306 .output_batch_stride = output_height * output_height_stride,
307 .output_height_stride = output_height_stride,
308 .output_width = output_width,
309 .pooling_size = pooling_size,
310 .channels = channels,
311 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
312 .output_increment = output_width_stride - (channels << log2_output_element_size),
313 .ukernel = maxpool->ukernel,
314 };
315 memcpy(&max_pooling_op->context.max_pooling.params, params, params_size);
316
317 max_pooling_op->compute.type = xnn_parallelization_type_2d;
318 max_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_max_pooling;
319 max_pooling_op->compute.range[0] = batch_size;
320 max_pooling_op->compute.range[1] = output_height;
321 max_pooling_op->state = xnn_run_state_ready;
322
323 return xnn_status_success;
324 }
325
xnn_create_max_pooling2d_nhwc_s8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)326 enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
327 uint32_t input_padding_top,
328 uint32_t input_padding_right,
329 uint32_t input_padding_bottom,
330 uint32_t input_padding_left,
331 uint32_t pooling_height,
332 uint32_t pooling_width,
333 uint32_t stride_height,
334 uint32_t stride_width,
335 uint32_t dilation_height,
336 uint32_t dilation_width,
337 size_t channels,
338 size_t input_pixel_stride,
339 size_t output_pixel_stride,
340 int8_t output_min,
341 int8_t output_max,
342 uint32_t flags,
343 xnn_operator_t* max_pooling_op_out)
344 {
345 if (output_min >= output_max) {
346 xnn_log_error(
347 "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
348 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_s8), output_min, output_max);
349 return xnn_status_invalid_parameter;
350 }
351
352 union xnn_s8_minmax_params params;
353 xnn_params.s8.maxpool.init.s8(¶ms, output_min, output_max);
354 return create_max_pooling2d_nhwc(
355 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
356 pooling_height, pooling_width,
357 stride_height, stride_width,
358 dilation_height, dilation_width,
359 channels, input_pixel_stride, output_pixel_stride,
360 flags,
361 ¶ms, sizeof(params), XNN_INIT_FLAG_S8,
362 xnn_operator_type_max_pooling_nhwc_s8,
363 max_pooling_op_out);
364 }
365
xnn_create_max_pooling2d_nhwc_u8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)366 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
367 uint32_t input_padding_top,
368 uint32_t input_padding_right,
369 uint32_t input_padding_bottom,
370 uint32_t input_padding_left,
371 uint32_t pooling_height,
372 uint32_t pooling_width,
373 uint32_t stride_height,
374 uint32_t stride_width,
375 uint32_t dilation_height,
376 uint32_t dilation_width,
377 size_t channels,
378 size_t input_pixel_stride,
379 size_t output_pixel_stride,
380 uint8_t output_min,
381 uint8_t output_max,
382 uint32_t flags,
383 xnn_operator_t* max_pooling_op_out)
384 {
385 if (output_min >= output_max) {
386 xnn_log_error(
387 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
388 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_u8), output_min, output_max);
389 return xnn_status_invalid_parameter;
390 }
391
392 union xnn_u8_minmax_params params;
393 xnn_params.u8.maxpool.init.u8(¶ms, output_min, output_max);
394 return create_max_pooling2d_nhwc(
395 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
396 pooling_height, pooling_width,
397 stride_height, stride_width,
398 dilation_height, dilation_width,
399 channels, input_pixel_stride, output_pixel_stride,
400 flags,
401 ¶ms, sizeof(params), XNN_INIT_FLAG_U8,
402 xnn_operator_type_max_pooling_nhwc_u8,
403 max_pooling_op_out);
404 }
405
xnn_create_max_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)406 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
407 uint32_t input_padding_top,
408 uint32_t input_padding_right,
409 uint32_t input_padding_bottom,
410 uint32_t input_padding_left,
411 uint32_t pooling_height,
412 uint32_t pooling_width,
413 uint32_t stride_height,
414 uint32_t stride_width,
415 uint32_t dilation_height,
416 uint32_t dilation_width,
417 size_t channels,
418 size_t input_pixel_stride,
419 size_t output_pixel_stride,
420 float output_min,
421 float output_max,
422 uint32_t flags,
423 xnn_operator_t* max_pooling_op_out)
424 {
425 if (isnan(output_min)) {
426 xnn_log_error(
427 "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
428 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
429 return xnn_status_invalid_parameter;
430 }
431
432 if (isnan(output_max)) {
433 xnn_log_error(
434 "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
435 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
436 return xnn_status_invalid_parameter;
437 }
438
439 if (output_min >= output_max) {
440 xnn_log_error(
441 "failed to create %s with [%.7g, %.7g] output range: lower bound must be below upper bound",
442 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32), output_min, output_max);
443 return xnn_status_invalid_parameter;
444 }
445
446 union xnn_f32_minmax_params params;
447 xnn_params.f32.maxpool.init.f32(¶ms, output_min, output_max);
448 return create_max_pooling2d_nhwc(
449 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
450 pooling_height, pooling_width,
451 stride_height, stride_width,
452 dilation_height, dilation_width,
453 channels, input_pixel_stride, output_pixel_stride,
454 flags,
455 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
456 xnn_operator_type_max_pooling_nhwc_f32,
457 max_pooling_op_out);
458 }
459
xnn_create_max_pooling2d_nhwc_f16(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)460 enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
461 uint32_t input_padding_top,
462 uint32_t input_padding_right,
463 uint32_t input_padding_bottom,
464 uint32_t input_padding_left,
465 uint32_t pooling_height,
466 uint32_t pooling_width,
467 uint32_t stride_height,
468 uint32_t stride_width,
469 uint32_t dilation_height,
470 uint32_t dilation_width,
471 size_t channels,
472 size_t input_pixel_stride,
473 size_t output_pixel_stride,
474 float output_min,
475 float output_max,
476 uint32_t flags,
477 xnn_operator_t* max_pooling_op_out)
478 {
479 if (isnan(output_min)) {
480 xnn_log_error(
481 "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
482 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
483 return xnn_status_invalid_parameter;
484 }
485
486 if (isnan(output_max)) {
487 xnn_log_error(
488 "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
489 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
490 return xnn_status_invalid_parameter;
491 }
492
493 const uint16_t output_min_as_half = fp16_ieee_from_fp32_value(output_min);
494 const uint16_t output_max_as_half = fp16_ieee_from_fp32_value(output_max);
495 output_min = fp16_ieee_to_fp32_value(output_min_as_half);
496 output_max = fp16_ieee_to_fp32_value(output_max_as_half);
497 if (output_min >= output_max) {
498 xnn_log_error(
499 "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
500 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16), output_min, output_max);
501 return xnn_status_invalid_parameter;
502 }
503
504 union xnn_f16_minmax_params params;
505 if (xnn_params.f16.maxpool.init.f16 != NULL) {
506 xnn_params.f16.maxpool.init.f16(¶ms, output_min_as_half, output_max_as_half);
507 }
508 return create_max_pooling2d_nhwc(
509 input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
510 pooling_height, pooling_width,
511 stride_height, stride_width,
512 dilation_height, dilation_width,
513 channels, input_pixel_stride, output_pixel_stride,
514 flags,
515 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
516 xnn_operator_type_max_pooling_nhwc_f16,
517 max_pooling_op_out);
518 }
519
xnn_setup_max_pooling2d_nhwc_s8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)520 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
521 xnn_operator_t max_pooling_op,
522 size_t batch_size,
523 size_t input_height,
524 size_t input_width,
525 const int8_t* input,
526 int8_t* output,
527 pthreadpool_t threadpool)
528 {
529 if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_s8) {
530 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
531 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_s8),
532 xnn_operator_type_to_string(max_pooling_op->type));
533 return xnn_status_invalid_parameter;
534 }
535
536 return setup_max_pooling2d_nhwc(
537 max_pooling_op,
538 batch_size, input_height, input_width,
539 input, output,
540 0 /* log2(sizeof(input element)) = log2(sizeof(int8_t)) */,
541 0 /* log2(sizeof(output element)) = log2(sizeof(int8_t)) */,
542 &xnn_params.s8.maxpool,
543 &max_pooling_op->params.s8_minmax, sizeof(max_pooling_op->params.s8_minmax),
544 pthreadpool_get_threads_count(threadpool));
545 }
546
xnn_setup_max_pooling2d_nhwc_u8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)547 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
548 xnn_operator_t max_pooling_op,
549 size_t batch_size,
550 size_t input_height,
551 size_t input_width,
552 const uint8_t* input,
553 uint8_t* output,
554 pthreadpool_t threadpool)
555 {
556 if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_u8) {
557 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
558 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_u8),
559 xnn_operator_type_to_string(max_pooling_op->type));
560 return xnn_status_invalid_parameter;
561 }
562
563 return setup_max_pooling2d_nhwc(
564 max_pooling_op,
565 batch_size, input_height, input_width,
566 input, output,
567 0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
568 0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
569 &xnn_params.u8.maxpool,
570 &max_pooling_op->params.u8_minmax, sizeof(max_pooling_op->params.u8_minmax),
571 pthreadpool_get_threads_count(threadpool));
572 }
573
xnn_setup_max_pooling2d_nhwc_f16(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)574 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
575 xnn_operator_t max_pooling_op,
576 size_t batch_size,
577 size_t input_height,
578 size_t input_width,
579 const void* input,
580 void* output,
581 pthreadpool_t threadpool)
582 {
583 if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f16) {
584 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
585 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16),
586 xnn_operator_type_to_string(max_pooling_op->type));
587 return xnn_status_invalid_parameter;
588 }
589
590 return setup_max_pooling2d_nhwc(
591 max_pooling_op,
592 batch_size, input_height, input_width,
593 input, output,
594 1 /* log2(sizeof(input element)) = log2(sizeof(uint16_t)) */,
595 1 /* log2(sizeof(output element)) = log2(sizeof(uint16_t)) */,
596 &xnn_params.f16.maxpool,
597 &max_pooling_op->params.f16_minmax, sizeof(max_pooling_op->params.f16_minmax),
598 pthreadpool_get_threads_count(threadpool));
599 }
600
xnn_setup_max_pooling2d_nhwc_f32(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)601 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
602 xnn_operator_t max_pooling_op,
603 size_t batch_size,
604 size_t input_height,
605 size_t input_width,
606 const float* input,
607 float* output,
608 pthreadpool_t threadpool)
609 {
610 if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f32) {
611 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
612 xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32),
613 xnn_operator_type_to_string(max_pooling_op->type));
614 return xnn_status_invalid_parameter;
615 }
616
617 return setup_max_pooling2d_nhwc(
618 max_pooling_op,
619 batch_size, input_height, input_width,
620 input, output,
621 2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
622 2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
623 &xnn_params.f32.maxpool,
624 &max_pooling_op->params.f32_minmax, sizeof(max_pooling_op->params.f32_minmax),
625 pthreadpool_get_threads_count(threadpool));
626 }
627
628