• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <fp16.h>
18 
19 #include <xnnpack.h>
20 #include <xnnpack/allocator.h>
21 #include <xnnpack/common.h>
22 #include <xnnpack/indirection.h>
23 #include <xnnpack/log.h>
24 #include <xnnpack/math.h>
25 #include <xnnpack/operator.h>
26 #include <xnnpack/params-init.h>
27 #include <xnnpack/params.h>
28 
29 
compute_output_dimension(size_t padded_input_dimension,size_t kernel_dimension,size_t dilation_dimension,size_t stride_dimension)30 static inline size_t compute_output_dimension(
31     size_t padded_input_dimension,
32     size_t kernel_dimension,
33     size_t dilation_dimension,
34     size_t stride_dimension)
35 {
36   const size_t effective_kernel_dimension = (kernel_dimension - 1) * dilation_dimension + 1;
37   return (padded_input_dimension - effective_kernel_dimension) / stride_dimension + 1;
38 }
39 
compute_output_dimension_with_tf_same_padding(size_t input_dimension,size_t stride_dimension)40 static inline size_t compute_output_dimension_with_tf_same_padding(
41     size_t input_dimension,
42     size_t stride_dimension)
43 {
44   return divide_round_up(input_dimension, stride_dimension);
45 }
46 
create_max_pooling2d_nhwc(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * max_pooling_op_out)47 static enum xnn_status create_max_pooling2d_nhwc(
48     uint32_t input_padding_top,
49     uint32_t input_padding_right,
50     uint32_t input_padding_bottom,
51     uint32_t input_padding_left,
52     uint32_t pooling_height,
53     uint32_t pooling_width,
54     uint32_t stride_height,
55     uint32_t stride_width,
56     uint32_t dilation_height,
57     uint32_t dilation_width,
58     size_t channels,
59     size_t input_pixel_stride,
60     size_t output_pixel_stride,
61     uint32_t flags,
62     const void* params,
63     size_t params_size,
64     uint32_t datatype_init_flags,
65     enum xnn_operator_type operator_type,
66     xnn_operator_t* max_pooling_op_out)
67 {
68   xnn_operator_t max_pooling_op = NULL;
69   enum xnn_status status = xnn_status_uninitialized;
70 
71   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
72     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
73       xnn_operator_type_to_string(operator_type));
74     return xnn_status_uninitialized;
75   }
76 
77   status = xnn_status_unsupported_hardware;
78 
79   if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
80     xnn_log_error(
81       "failed to create %s operator: operations on data type are not supported",
82       xnn_operator_type_to_string(operator_type));
83     goto error;
84   }
85 
86   status = xnn_status_invalid_parameter;
87 
88   const uint32_t pooling_size = pooling_height * pooling_width;
89   if (pooling_size == 0) {
90     xnn_log_error(
91       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " pooling size: "
92       "pooling size dimensions must be non-zero",
93       xnn_operator_type_to_string(operator_type),
94       pooling_width, pooling_height);
95     goto error;
96   }
97 
98   if (pooling_size == 1) {
99     xnn_log_error(
100       "failed to create %s operator with 1 pooling element: 1x1 pooling is meaningless",
101       xnn_operator_type_to_string(operator_type));
102     goto error;
103   }
104 
105   if (stride_height == 0 || stride_width == 0) {
106     xnn_log_error(
107       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " stride: stride dimensions must be non-zero",
108       xnn_operator_type_to_string(operator_type), stride_width, stride_height);
109     goto error;
110   }
111 
112   if (dilation_height == 0 || dilation_width == 0) {
113     xnn_log_error(
114       "failed to create %s operator with %" PRIu32 "x%" PRIu32 " dilation: dilation dimensions must be non-zero",
115       xnn_operator_type_to_string(operator_type), dilation_width, dilation_height);
116     goto error;
117   }
118 
119   if (channels == 0) {
120     xnn_log_error(
121       "failed to create %s operator with %zu channels: number of channels must be non-zero",
122       xnn_operator_type_to_string(operator_type), channels);
123     goto error;
124   }
125 
126   if (input_pixel_stride < channels) {
127     xnn_log_error(
128       "failed to create %s operator with input pixel stride of %zu: "
129       "stride must be at least as large as the number of channels (%zu)",
130       xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
131     goto error;
132   }
133 
134   if (output_pixel_stride < channels) {
135     xnn_log_error(
136       "failed to create %s operator with output pixel stride of %zu: "
137       "stride must be at least as large as the number of channels (%zu)",
138       xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
139     goto error;
140   }
141 
142   const bool any_padding = (input_padding_left | input_padding_top | input_padding_right | input_padding_bottom) != 0;
143   if ((flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0) {
144     if (any_padding) {
145       xnn_log_error(
146         "failed to create %s operator with %" PRIu32 "+%" PRIu32 "x%" PRIu32 "+%" PRIu32" padding: "
147         "TensorFlow SAME padding can't be combined with explicit padding specification",
148         xnn_operator_type_to_string(operator_type),
149         input_padding_top, input_padding_left, input_padding_bottom, input_padding_right);
150       goto error;
151     }
152   }
153 
154   status = xnn_status_out_of_memory;
155 
156   max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
157   if (max_pooling_op == NULL) {
158     xnn_log_error(
159       "failed to allocate %zu bytes for %s operator descriptor",
160       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
161     goto error;
162   }
163 
164   max_pooling_op->padding_top = input_padding_top;
165   max_pooling_op->padding_right = input_padding_right;
166   max_pooling_op->padding_bottom = input_padding_bottom;
167   max_pooling_op->padding_left = input_padding_left;
168 
169   max_pooling_op->kernel_height = pooling_height;
170   max_pooling_op->kernel_width = pooling_width;
171   max_pooling_op->stride_height = stride_height;
172   max_pooling_op->stride_width = stride_width;
173   max_pooling_op->dilation_height = dilation_height;
174   max_pooling_op->dilation_width = dilation_width;
175   max_pooling_op->channels = channels;
176   max_pooling_op->input_pixel_stride = input_pixel_stride;
177   max_pooling_op->output_pixel_stride = output_pixel_stride;
178 
179   memcpy(&max_pooling_op->params, params, params_size);
180   max_pooling_op->type = operator_type;
181   max_pooling_op->flags = flags;
182 
183   max_pooling_op->state = xnn_run_state_invalid;
184 
185   *max_pooling_op_out = max_pooling_op;
186   return xnn_status_success;
187 
188 error:
189   xnn_delete_operator(max_pooling_op);
190   return status;
191 }
192 
setup_max_pooling2d_nhwc(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_input_element_size,uint32_t log2_output_element_size,struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS (1)],const void * params,size_t params_size,size_t num_threads)193 static enum xnn_status setup_max_pooling2d_nhwc(
194   xnn_operator_t max_pooling_op,
195   size_t batch_size,
196   size_t input_height,
197   size_t input_width,
198   const void* input,
199   void* output,
200   uint32_t log2_input_element_size,
201   uint32_t log2_output_element_size,
202   struct maxpool_parameters maxpool[restrict XNN_MIN_ELEMENTS(1)],
203   const void* params,
204   size_t params_size,
205   size_t num_threads)
206 {
207   max_pooling_op->state = xnn_run_state_invalid;
208 
209   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
210     xnn_log_error(
211       "failed to setup %s operator: XNNPACK is not initialized",
212       xnn_operator_type_to_string(max_pooling_op->type));
213     return xnn_status_uninitialized;
214   }
215 
216   if (input_width == 0 || input_height == 0) {
217     xnn_log_error(
218       "failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
219       xnn_operator_type_to_string(max_pooling_op->type), input_width, input_height);
220     return xnn_status_invalid_parameter;
221   }
222 
223   if (batch_size == 0) {
224     max_pooling_op->state = xnn_run_state_skip;
225     return xnn_status_success;
226   }
227 
228   max_pooling_op->input_height = input_height;
229   max_pooling_op->input_width = input_width;
230   max_pooling_op->input = input;
231 
232   if (max_pooling_op->flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) {
233     max_pooling_op->output_height = compute_output_dimension_with_tf_same_padding(
234         input_height, max_pooling_op->stride_height);
235     max_pooling_op->output_width = compute_output_dimension_with_tf_same_padding(
236         input_width, max_pooling_op->stride_width);
237 
238     const uint32_t effective_kernel_height = (max_pooling_op->kernel_height - 1) * max_pooling_op->dilation_height + 1;
239     const uint32_t effective_kernel_width = (max_pooling_op->kernel_width - 1) * max_pooling_op->dilation_width + 1;
240     const uint32_t total_padding_height =
241       doz((max_pooling_op->output_height - 1) * max_pooling_op->stride_height + effective_kernel_height, input_height);
242     const uint32_t total_padding_width =
243       doz((max_pooling_op->output_width - 1) * max_pooling_op->stride_width + effective_kernel_width, input_width);
244     max_pooling_op->padding_top = total_padding_height / 2;
245     max_pooling_op->padding_left = total_padding_width / 2;
246     max_pooling_op->padding_bottom = total_padding_height - max_pooling_op->padding_top;
247     max_pooling_op->padding_right = total_padding_width - max_pooling_op->padding_left;
248   } else {
249     max_pooling_op->output_height = compute_output_dimension(
250         max_pooling_op->padding_top + input_height + max_pooling_op->padding_bottom,
251         max_pooling_op->kernel_height,
252         max_pooling_op->dilation_height,
253         max_pooling_op->stride_height);
254     max_pooling_op->output_width = compute_output_dimension(
255         max_pooling_op->padding_left + input_width + max_pooling_op->padding_right,
256         max_pooling_op->kernel_width,
257         max_pooling_op->dilation_width,
258         max_pooling_op->stride_width);
259   }
260 
261   const size_t pooling_height = max_pooling_op->kernel_height;
262   const size_t pooling_width = max_pooling_op->kernel_width;
263   const size_t pooling_size = pooling_height * pooling_width;
264   const size_t output_height = max_pooling_op->output_height;
265   const size_t output_width = max_pooling_op->output_width;
266   const uint32_t mr = maxpool->mr;
267 
268   const size_t step_width =
269     max_pooling_op->dilation_width > 1 ? pooling_width : min(max_pooling_op->stride_width, pooling_width);
270   const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
271 
272   if (input_height != max_pooling_op->last_input_height ||
273       input_width != max_pooling_op->last_input_width)
274   {
275     // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
276     const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + output_height * step_height);
277     const void** indirection_buffer =
278       (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
279     if (indirection_buffer == NULL) {
280       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
281       return xnn_status_out_of_memory;
282     }
283     max_pooling_op->indirection_buffer = indirection_buffer;
284 
285     xnn_indirection_init_maxpool2d(max_pooling_op, step_height, step_width, log2_input_element_size);
286 
287     max_pooling_op->last_input = input;
288     max_pooling_op->last_input_height = input_height;
289     max_pooling_op->last_input_width = input_width;
290   }
291 
292   const uint32_t qr = maxpool->qr;
293   const size_t channels = max_pooling_op->channels;
294 
295   const size_t indirect_input_height_stride = step_height * sizeof(void*);
296   const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size;
297   const size_t output_height_stride = output_width * output_width_stride;
298   const size_t multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr;
299 
300   max_pooling_op->context.max_pooling = (struct max_pooling_context) {
301     .indirect_input = max_pooling_op->indirection_buffer,
302     .indirect_input_height_stride = indirect_input_height_stride,
303     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) max_pooling_op->last_input),
304     .input_batch_stride = (input_height * input_width * max_pooling_op->input_pixel_stride) << log2_input_element_size,
305     .output = output,
306     .output_batch_stride = output_height * output_height_stride,
307     .output_height_stride = output_height_stride,
308     .output_width = output_width,
309     .pooling_size = pooling_size,
310     .channels = channels,
311     .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
312     .output_increment = output_width_stride - (channels << log2_output_element_size),
313     .ukernel = maxpool->ukernel,
314   };
315   memcpy(&max_pooling_op->context.max_pooling.params, params, params_size);
316 
317   max_pooling_op->compute.type = xnn_parallelization_type_2d;
318   max_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_max_pooling;
319   max_pooling_op->compute.range[0] = batch_size;
320   max_pooling_op->compute.range[1] = output_height;
321   max_pooling_op->state = xnn_run_state_ready;
322 
323   return xnn_status_success;
324 }
325 
xnn_create_max_pooling2d_nhwc_s8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)326 enum xnn_status xnn_create_max_pooling2d_nhwc_s8(
327     uint32_t input_padding_top,
328     uint32_t input_padding_right,
329     uint32_t input_padding_bottom,
330     uint32_t input_padding_left,
331     uint32_t pooling_height,
332     uint32_t pooling_width,
333     uint32_t stride_height,
334     uint32_t stride_width,
335     uint32_t dilation_height,
336     uint32_t dilation_width,
337     size_t channels,
338     size_t input_pixel_stride,
339     size_t output_pixel_stride,
340     int8_t output_min,
341     int8_t output_max,
342     uint32_t flags,
343     xnn_operator_t* max_pooling_op_out)
344 {
345   if (output_min >= output_max) {
346     xnn_log_error(
347       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
348       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_s8), output_min, output_max);
349     return xnn_status_invalid_parameter;
350   }
351 
352   union xnn_s8_minmax_params params;
353   xnn_params.s8.maxpool.init.s8(&params, output_min, output_max);
354   return create_max_pooling2d_nhwc(
355     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
356     pooling_height, pooling_width,
357     stride_height, stride_width,
358     dilation_height, dilation_width,
359     channels, input_pixel_stride, output_pixel_stride,
360     flags,
361     &params, sizeof(params), XNN_INIT_FLAG_S8,
362     xnn_operator_type_max_pooling_nhwc_s8,
363     max_pooling_op_out);
364 }
365 
xnn_create_max_pooling2d_nhwc_u8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)366 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
367     uint32_t input_padding_top,
368     uint32_t input_padding_right,
369     uint32_t input_padding_bottom,
370     uint32_t input_padding_left,
371     uint32_t pooling_height,
372     uint32_t pooling_width,
373     uint32_t stride_height,
374     uint32_t stride_width,
375     uint32_t dilation_height,
376     uint32_t dilation_width,
377     size_t channels,
378     size_t input_pixel_stride,
379     size_t output_pixel_stride,
380     uint8_t output_min,
381     uint8_t output_max,
382     uint32_t flags,
383     xnn_operator_t* max_pooling_op_out)
384 {
385   if (output_min >= output_max) {
386     xnn_log_error(
387       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
388       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_u8), output_min, output_max);
389     return xnn_status_invalid_parameter;
390   }
391 
392   union xnn_u8_minmax_params params;
393   xnn_params.u8.maxpool.init.u8(&params, output_min, output_max);
394   return create_max_pooling2d_nhwc(
395     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
396     pooling_height, pooling_width,
397     stride_height, stride_width,
398     dilation_height, dilation_width,
399     channels, input_pixel_stride, output_pixel_stride,
400     flags,
401     &params, sizeof(params), XNN_INIT_FLAG_U8,
402     xnn_operator_type_max_pooling_nhwc_u8,
403     max_pooling_op_out);
404 }
405 
xnn_create_max_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)406 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
407     uint32_t input_padding_top,
408     uint32_t input_padding_right,
409     uint32_t input_padding_bottom,
410     uint32_t input_padding_left,
411     uint32_t pooling_height,
412     uint32_t pooling_width,
413     uint32_t stride_height,
414     uint32_t stride_width,
415     uint32_t dilation_height,
416     uint32_t dilation_width,
417     size_t channels,
418     size_t input_pixel_stride,
419     size_t output_pixel_stride,
420     float output_min,
421     float output_max,
422     uint32_t flags,
423     xnn_operator_t* max_pooling_op_out)
424 {
425   if (isnan(output_min)) {
426     xnn_log_error(
427       "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
428       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
429     return xnn_status_invalid_parameter;
430   }
431 
432   if (isnan(output_max)) {
433     xnn_log_error(
434       "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
435       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32));
436     return xnn_status_invalid_parameter;
437   }
438 
439   if (output_min >= output_max) {
440     xnn_log_error(
441       "failed to create %s with [%.7g, %.7g] output range: lower bound must be below upper bound",
442       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32), output_min, output_max);
443     return xnn_status_invalid_parameter;
444   }
445 
446   union xnn_f32_minmax_params params;
447   xnn_params.f32.maxpool.init.f32(&params, output_min, output_max);
448   return create_max_pooling2d_nhwc(
449     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
450     pooling_height, pooling_width,
451     stride_height, stride_width,
452     dilation_height, dilation_width,
453     channels, input_pixel_stride, output_pixel_stride,
454     flags,
455     &params, sizeof(params), XNN_INIT_FLAG_F32,
456     xnn_operator_type_max_pooling_nhwc_f32,
457     max_pooling_op_out);
458 }
459 
xnn_create_max_pooling2d_nhwc_f16(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)460 enum xnn_status xnn_create_max_pooling2d_nhwc_f16(
461     uint32_t input_padding_top,
462     uint32_t input_padding_right,
463     uint32_t input_padding_bottom,
464     uint32_t input_padding_left,
465     uint32_t pooling_height,
466     uint32_t pooling_width,
467     uint32_t stride_height,
468     uint32_t stride_width,
469     uint32_t dilation_height,
470     uint32_t dilation_width,
471     size_t channels,
472     size_t input_pixel_stride,
473     size_t output_pixel_stride,
474     float output_min,
475     float output_max,
476     uint32_t flags,
477     xnn_operator_t* max_pooling_op_out)
478 {
479   if (isnan(output_min)) {
480     xnn_log_error(
481       "failed to create %s with NaN output lower bound: lower bound must be non-NaN",
482       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
483     return xnn_status_invalid_parameter;
484   }
485 
486   if (isnan(output_max)) {
487     xnn_log_error(
488       "failed to create %s with NaN output upper bound: upper bound must be non-NaN",
489       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16));
490     return xnn_status_invalid_parameter;
491   }
492 
493   const uint16_t output_min_as_half = fp16_ieee_from_fp32_value(output_min);
494   const uint16_t output_max_as_half = fp16_ieee_from_fp32_value(output_max);
495   output_min = fp16_ieee_to_fp32_value(output_min_as_half);
496   output_max = fp16_ieee_to_fp32_value(output_max_as_half);
497   if (output_min >= output_max) {
498     xnn_log_error(
499       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
500       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16), output_min, output_max);
501     return xnn_status_invalid_parameter;
502   }
503 
504   union xnn_f16_minmax_params params;
505   if (xnn_params.f16.maxpool.init.f16 != NULL) {
506     xnn_params.f16.maxpool.init.f16(&params, output_min_as_half, output_max_as_half);
507   }
508   return create_max_pooling2d_nhwc(
509     input_padding_top, input_padding_right, input_padding_bottom, input_padding_left,
510     pooling_height, pooling_width,
511     stride_height, stride_width,
512     dilation_height, dilation_width,
513     channels, input_pixel_stride, output_pixel_stride,
514     flags,
515     &params, sizeof(params), XNN_INIT_FLAG_F16,
516     xnn_operator_type_max_pooling_nhwc_f16,
517     max_pooling_op_out);
518 }
519 
xnn_setup_max_pooling2d_nhwc_s8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const int8_t * input,int8_t * output,pthreadpool_t threadpool)520 enum xnn_status xnn_setup_max_pooling2d_nhwc_s8(
521     xnn_operator_t max_pooling_op,
522     size_t batch_size,
523     size_t input_height,
524     size_t input_width,
525     const int8_t* input,
526     int8_t* output,
527     pthreadpool_t threadpool)
528 {
529   if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_s8) {
530     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
531       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_s8),
532       xnn_operator_type_to_string(max_pooling_op->type));
533     return xnn_status_invalid_parameter;
534   }
535 
536   return setup_max_pooling2d_nhwc(
537     max_pooling_op,
538     batch_size, input_height, input_width,
539     input, output,
540     0 /* log2(sizeof(input element)) = log2(sizeof(int8_t)) */,
541     0 /* log2(sizeof(output element)) = log2(sizeof(int8_t)) */,
542     &xnn_params.s8.maxpool,
543     &max_pooling_op->params.s8_minmax, sizeof(max_pooling_op->params.s8_minmax),
544     pthreadpool_get_threads_count(threadpool));
545 }
546 
xnn_setup_max_pooling2d_nhwc_u8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)547 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
548     xnn_operator_t max_pooling_op,
549     size_t batch_size,
550     size_t input_height,
551     size_t input_width,
552     const uint8_t* input,
553     uint8_t* output,
554     pthreadpool_t threadpool)
555 {
556   if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_u8) {
557     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
558       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_u8),
559       xnn_operator_type_to_string(max_pooling_op->type));
560     return xnn_status_invalid_parameter;
561   }
562 
563   return setup_max_pooling2d_nhwc(
564     max_pooling_op,
565     batch_size, input_height, input_width,
566     input, output,
567     0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
568     0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
569     &xnn_params.u8.maxpool,
570     &max_pooling_op->params.u8_minmax, sizeof(max_pooling_op->params.u8_minmax),
571     pthreadpool_get_threads_count(threadpool));
572 }
573 
xnn_setup_max_pooling2d_nhwc_f16(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,pthreadpool_t threadpool)574 enum xnn_status xnn_setup_max_pooling2d_nhwc_f16(
575     xnn_operator_t max_pooling_op,
576     size_t batch_size,
577     size_t input_height,
578     size_t input_width,
579     const void* input,
580     void* output,
581     pthreadpool_t threadpool)
582 {
583   if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f16) {
584     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
585       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f16),
586       xnn_operator_type_to_string(max_pooling_op->type));
587     return xnn_status_invalid_parameter;
588   }
589 
590   return setup_max_pooling2d_nhwc(
591     max_pooling_op,
592     batch_size, input_height, input_width,
593     input, output,
594     1 /* log2(sizeof(input element)) = log2(sizeof(uint16_t)) */,
595     1 /* log2(sizeof(output element)) = log2(sizeof(uint16_t)) */,
596     &xnn_params.f16.maxpool,
597     &max_pooling_op->params.f16_minmax, sizeof(max_pooling_op->params.f16_minmax),
598     pthreadpool_get_threads_count(threadpool));
599 }
600 
xnn_setup_max_pooling2d_nhwc_f32(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)601 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
602     xnn_operator_t max_pooling_op,
603     size_t batch_size,
604     size_t input_height,
605     size_t input_width,
606     const float* input,
607     float* output,
608     pthreadpool_t threadpool)
609 {
610   if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f32) {
611     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
612       xnn_operator_type_to_string(xnn_operator_type_max_pooling_nhwc_f32),
613       xnn_operator_type_to_string(max_pooling_op->type));
614     return xnn_status_invalid_parameter;
615   }
616 
617   return setup_max_pooling2d_nhwc(
618     max_pooling_op,
619     batch_size, input_height, input_width,
620     input, output,
621     2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
622     2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
623     &xnn_params.f32.maxpool,
624     &max_pooling_op->params.f32_minmax, sizeof(max_pooling_op->params.f32_minmax),
625     pthreadpool_get_threads_count(threadpool));
626 }
627 
628