1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16
17 #include <xnnpack.h>
18 #include <xnnpack/allocator.h>
19 #include <xnnpack/common.h>
20 #include <xnnpack/indirection.h>
21 #include <xnnpack/log.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/operator.h>
24 #include <xnnpack/params-init.h>
25 #include <xnnpack/params.h>
26
27
compute_output_dimension(size_t padded_input_dimension,size_t kernel_dimension,size_t dilation_dimension,size_t stride_dimension)28 static inline size_t compute_output_dimension(
29 size_t padded_input_dimension,
30 size_t kernel_dimension,
31 size_t dilation_dimension,
32 size_t stride_dimension)
33 {
34 const size_t effective_kernel_dimension = (kernel_dimension - 1) * dilation_dimension + 1;
35 return (padded_input_dimension - effective_kernel_dimension) / stride_dimension + 1;
36 }
37
xnn_create_max_pooling2d_nhwc_u8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)38 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
39 uint32_t input_padding_top,
40 uint32_t input_padding_right,
41 uint32_t input_padding_bottom,
42 uint32_t input_padding_left,
43 uint32_t pooling_height,
44 uint32_t pooling_width,
45 uint32_t stride_height,
46 uint32_t stride_width,
47 uint32_t dilation_height,
48 uint32_t dilation_width,
49 size_t channels,
50 size_t input_pixel_stride,
51 size_t output_pixel_stride,
52 uint8_t output_min,
53 uint8_t output_max,
54 uint32_t flags,
55 xnn_operator_t* max_pooling_op_out)
56 {
57 xnn_operator_t max_pooling_op = NULL;
58 enum xnn_status status = xnn_status_uninitialized;
59
60 if (!xnn_params.initialized) {
61 xnn_log_error("failed to create Max Pooling operator: XNNPACK is not initialized");
62 goto error;
63 }
64
65 status = xnn_status_invalid_parameter;
66
67 const uint32_t pooling_size = pooling_height * pooling_width;
68 if (pooling_size == 0) {
69 xnn_log_error(
70 "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
71 "pooling size dimensions must be non-zero",
72 pooling_width, pooling_height);
73 goto error;
74 }
75
76 if (pooling_size == 1) {
77 xnn_log_error(
78 "failed to create Max Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
79 goto error;
80 }
81
82 if (stride_height == 0 || stride_width == 0) {
83 xnn_log_error(
84 "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
85 "stride dimensions must be non-zero",
86 stride_width, stride_height);
87 goto error;
88 }
89
90 if (dilation_height == 0 || dilation_width == 0) {
91 xnn_log_error(
92 "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " dilation: "
93 "dilation dimensions must be non-zero",
94 dilation_width, dilation_height);
95 goto error;
96 }
97
98 if (channels == 0) {
99 xnn_log_error(
100 "failed to create Max Pooling operator with %zu channels: number of channels must be non-zero",
101 channels);
102 goto error;
103 }
104
105 if (input_pixel_stride < channels) {
106 xnn_log_error(
107 "failed to create Max Pooling operator with input pixel stride of %zu: "
108 "stride must be at least as large as the number of channels (%zu)",
109 input_pixel_stride, channels);
110 goto error;
111 }
112
113 if (output_pixel_stride < channels) {
114 xnn_log_error(
115 "failed to create Max Pooling operator with output pixel stride of %zu: "
116 "stride must be at least as large as the number of channels (%zu)",
117 output_pixel_stride, channels);
118 goto error;
119 }
120
121 if (output_min >= output_max) {
122 xnn_log_error(
123 "failed to create Max Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
124 "range min must be below range max",
125 output_min, output_max);
126 goto error;
127 }
128
129 status = xnn_status_out_of_memory;
130
131 max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
132 if (max_pooling_op == NULL) {
133 xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
134 goto error;
135 }
136
137 max_pooling_op->padding_top = input_padding_top;
138 max_pooling_op->padding_right = input_padding_right;
139 max_pooling_op->padding_bottom = input_padding_bottom;
140 max_pooling_op->padding_left = input_padding_left;
141
142 max_pooling_op->kernel_height = pooling_height;
143 max_pooling_op->kernel_width = pooling_width;
144 max_pooling_op->stride_height = stride_height;
145 max_pooling_op->stride_width = stride_width;
146 max_pooling_op->dilation_height = dilation_height;
147 max_pooling_op->dilation_width = dilation_width;
148 max_pooling_op->channels = channels;
149 max_pooling_op->input_pixel_stride = input_pixel_stride;
150 max_pooling_op->output_pixel_stride = output_pixel_stride;
151
152 max_pooling_op->u8_output_params = xnn_init_u8_output_params(output_min, output_max);
153
154 max_pooling_op->type = xnn_operator_type_max_pooling_nhwc_u8;
155 max_pooling_op->ukernel.type = xnn_ukernel_type_max_pooling;
156
157 max_pooling_op->state = xnn_run_state_invalid;
158
159 *max_pooling_op_out = max_pooling_op;
160 return xnn_status_success;
161
162 error:
163 xnn_delete_operator(max_pooling_op);
164 return status;
165 }
166
xnn_create_max_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)167 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
168 uint32_t input_padding_top,
169 uint32_t input_padding_right,
170 uint32_t input_padding_bottom,
171 uint32_t input_padding_left,
172 uint32_t pooling_height,
173 uint32_t pooling_width,
174 uint32_t stride_height,
175 uint32_t stride_width,
176 uint32_t dilation_height,
177 uint32_t dilation_width,
178 size_t channels,
179 size_t input_pixel_stride,
180 size_t output_pixel_stride,
181 float output_min,
182 float output_max,
183 uint32_t flags,
184 xnn_operator_t* max_pooling_op_out)
185 {
186 xnn_operator_t max_pooling_op = NULL;
187 enum xnn_status status = xnn_status_uninitialized;
188
189 if (!xnn_params.initialized) {
190 xnn_log_error("failed to setup Max Pooling operator: XNNPACK is not initialized");
191 return xnn_status_uninitialized;
192 }
193
194 status = xnn_status_invalid_parameter;
195
196 const uint32_t pooling_size = pooling_height * pooling_width;
197 if (pooling_size == 0) {
198 xnn_log_error(
199 "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
200 "pooling size dimensions must be non-zero",
201 pooling_width, pooling_height);
202 goto error;
203 }
204
205 if (pooling_size == 1) {
206 xnn_log_error(
207 "failed to create Max Pooling operator with 1 pooling element: "
208 "1x1 pooling is meaningless");
209 goto error;
210 }
211
212 if (stride_height == 0 || stride_width == 0) {
213 xnn_log_error(
214 "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
215 "stride dimensions must be non-zero",
216 stride_width, stride_height);
217 goto error;
218 }
219
220 if (dilation_height == 0 || dilation_width == 0) {
221 xnn_log_error(
222 "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " dilation: "
223 "dilation dimensions must be non-zero",
224 dilation_width, dilation_height);
225 goto error;
226 }
227
228 if (channels == 0) {
229 xnn_log_error(
230 "failed to create Max Pooling operator with %zu channels: number of channels must be non-zero",
231 channels);
232 goto error;
233 }
234
235 if (input_pixel_stride < channels) {
236 xnn_log_error(
237 "failed to create Max Pooling operator with input pixel stride of %zu: "
238 "stride must be at least as large as the number of channels (%zu)",
239 input_pixel_stride, channels);
240 goto error;
241 }
242
243 if (output_pixel_stride < channels) {
244 xnn_log_error(
245 "failed to create Max Pooling operator with output pixel stride of %zu: "
246 "stride must be at least as large as the number of channels (%zu)",
247 output_pixel_stride, channels);
248 goto error;
249 }
250
251 if (isnan(output_min)) {
252 xnn_log_error(
253 "failed to create Max Pooling with NaN output lower bound: lower bound must be non-NaN");
254 goto error;
255 }
256
257 if (isnan(output_max)) {
258 xnn_log_error(
259 "failed to create Max Pooling with NaN output upper bound: upper bound must be non-NaN");
260 goto error;
261 }
262
263 if (output_min >= output_max) {
264 xnn_log_error(
265 "failed to create Max Pooling with [%.7g, %.7g] output range: lower bound must be below upper bound",
266 output_min, output_max);
267 goto error;
268 }
269
270 status = xnn_status_out_of_memory;
271
272 max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
273 if (max_pooling_op == NULL) {
274 xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
275 goto error;
276 }
277
278 max_pooling_op->padding_top = input_padding_top;
279 max_pooling_op->padding_right = input_padding_right;
280 max_pooling_op->padding_bottom = input_padding_bottom;
281 max_pooling_op->padding_left = input_padding_left;
282
283 max_pooling_op->kernel_height = pooling_height;
284 max_pooling_op->kernel_width = pooling_width;
285 max_pooling_op->stride_height = stride_height;
286 max_pooling_op->stride_width = stride_width;
287 max_pooling_op->dilation_height = dilation_height;
288 max_pooling_op->dilation_width = dilation_width;
289 max_pooling_op->channels = channels;
290 max_pooling_op->input_pixel_stride = input_pixel_stride;
291 max_pooling_op->output_pixel_stride = output_pixel_stride;
292
293 max_pooling_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
294
295 max_pooling_op->type = xnn_operator_type_max_pooling_nhwc_f32;
296 max_pooling_op->ukernel.type = xnn_ukernel_type_max_pooling;
297
298 max_pooling_op->state = xnn_run_state_invalid;
299
300 *max_pooling_op_out = max_pooling_op;
301 return xnn_status_success;
302
303 error:
304 xnn_delete_operator(max_pooling_op);
305 return status;
306 }
307
setup_max_pooling2d(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_input_element_size,uint32_t log2_output_element_size,struct maxpool_parameters maxpool[restrict static1],const void * params,size_t num_threads)308 static enum xnn_status setup_max_pooling2d(
309 xnn_operator_t max_pooling_op,
310 size_t batch_size,
311 size_t input_height,
312 size_t input_width,
313 const void* input,
314 void* output,
315 uint32_t log2_input_element_size,
316 uint32_t log2_output_element_size,
317 struct maxpool_parameters maxpool[restrict static 1],
318 const void* params,
319 size_t num_threads)
320 {
321 max_pooling_op->state = xnn_run_state_invalid;
322
323 if (!xnn_params.initialized) {
324 xnn_log_error(
325 "failed to setup Max Pooling operator: XNNPACK is not initialized");
326 return xnn_status_uninitialized;
327 }
328
329 if (input_width == 0 || input_height == 0) {
330 xnn_log_error(
331 "failed to setup Max Pooling operator with %zux%zu input: input dimensions must be non-zero",
332 input_width, input_height);
333 return xnn_status_invalid_parameter;
334 }
335
336 if (batch_size == 0) {
337 max_pooling_op->state = xnn_run_state_skip;
338 return xnn_status_success;
339 }
340
341 max_pooling_op->input_height = input_height;
342 max_pooling_op->input_width = input_width;
343 max_pooling_op->input = input;
344
345 max_pooling_op->output_height = compute_output_dimension(
346 max_pooling_op->padding_top + input_height + max_pooling_op->padding_bottom,
347 max_pooling_op->kernel_height,
348 max_pooling_op->dilation_height,
349 max_pooling_op->stride_height);
350 max_pooling_op->output_width = compute_output_dimension(
351 max_pooling_op->padding_left + input_width + max_pooling_op->padding_right,
352 max_pooling_op->kernel_width,
353 max_pooling_op->dilation_width,
354 max_pooling_op->stride_width);
355
356 const size_t pooling_height = max_pooling_op->kernel_height;
357 const size_t pooling_width = max_pooling_op->kernel_width;
358 const size_t pooling_size = pooling_height * pooling_width;
359 const size_t output_height = max_pooling_op->output_height;
360 const size_t output_width = max_pooling_op->output_width;
361 const uint32_t mr = maxpool->mr;
362
363 const size_t step_width =
364 max_pooling_op->dilation_width > 1 ? pooling_width : min(max_pooling_op->stride_width, pooling_width);
365 const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
366
367 if (input_height != max_pooling_op->last_input_height ||
368 input_width != max_pooling_op->last_input_width)
369 {
370 // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
371 const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + output_height * step_height);
372 const void** indirection_buffer = (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
373 if (indirection_buffer == NULL) {
374 xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
375 return xnn_status_out_of_memory;
376 }
377 max_pooling_op->indirection_buffer = indirection_buffer;
378
379 xnn_indirection_init_maxpool2d(max_pooling_op, step_height, step_width, log2_input_element_size);
380
381 max_pooling_op->last_input = input;
382 max_pooling_op->last_input_height = input_height;
383 max_pooling_op->last_input_width = input_width;
384 }
385
386 const uint32_t qr = maxpool->qr;
387 const size_t channels = max_pooling_op->channels;
388
389 const size_t indirect_input_height_stride = step_height * sizeof(void*);
390 const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size;
391 const size_t output_height_stride = output_width * output_width_stride;
392 const size_t multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr;
393
394 max_pooling_op->context.max_pooling = (struct max_pooling_context) {
395 .indirect_input = max_pooling_op->indirection_buffer,
396 .indirect_input_height_stride = indirect_input_height_stride,
397 .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) max_pooling_op->last_input),
398 .input_batch_stride = (input_height * input_width * max_pooling_op->input_pixel_stride) << log2_input_element_size,
399 .output = output,
400 .output_batch_stride = output_height * output_height_stride,
401 .output_height_stride = output_height_stride,
402 .output_width = output_width,
403 .pooling_size = pooling_size,
404 .channels = channels,
405 .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
406 .output_increment = output_width_stride - (channels << log2_output_element_size),
407 .ukernel = maxpool->ukernel,
408 };
409 memcpy(&max_pooling_op->context.max_pooling.params, params, sizeof(max_pooling_op->context.max_pooling.params));
410
411 max_pooling_op->compute.type = xnn_parallelization_type_2d;
412 max_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_max_pooling;
413 max_pooling_op->compute.range[0] = batch_size;
414 max_pooling_op->compute.range[1] = output_height;
415 max_pooling_op->state = xnn_run_state_ready;
416
417 return xnn_status_success;
418 }
419
xnn_setup_max_pooling2d_nhwc_u8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)420 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
421 xnn_operator_t max_pooling_op,
422 size_t batch_size,
423 size_t input_height,
424 size_t input_width,
425 const uint8_t* input,
426 uint8_t* output,
427 pthreadpool_t threadpool)
428 {
429 if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_u8) {
430 xnn_log_error("failed to setup Max Pooling (NHWC, U8) operator: operator type mismatch");
431 return xnn_status_invalid_parameter;
432 }
433
434 return setup_max_pooling2d(
435 max_pooling_op,
436 batch_size, input_height, input_width,
437 input, output,
438 0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
439 0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
440 &xnn_params.u8.maxpool,
441 &max_pooling_op->u8_output_params,
442 pthreadpool_get_threads_count(threadpool));
443 }
444
xnn_setup_max_pooling2d_nhwc_f32(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)445 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
446 xnn_operator_t max_pooling_op,
447 size_t batch_size,
448 size_t input_height,
449 size_t input_width,
450 const float* input,
451 float* output,
452 pthreadpool_t threadpool)
453 {
454 if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f32) {
455 xnn_log_error("failed to setup Max Pooling (NHWC, F32) operator: operator type mismatch");
456 return xnn_status_invalid_parameter;
457 }
458
459 return setup_max_pooling2d(
460 max_pooling_op,
461 batch_size, input_height, input_width,
462 input, output,
463 2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
464 2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
465 &xnn_params.f32.maxpool,
466 &max_pooling_op->f32_output_params,
467 pthreadpool_get_threads_count(threadpool));
468 }
469
470