• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stdbool.h>
12 #include <stddef.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 #include <xnnpack.h>
18 #include <xnnpack/allocator.h>
19 #include <xnnpack/common.h>
20 #include <xnnpack/indirection.h>
21 #include <xnnpack/log.h>
22 #include <xnnpack/math.h>
23 #include <xnnpack/operator.h>
24 #include <xnnpack/params-init.h>
25 #include <xnnpack/params.h>
26 
27 
compute_output_dimension(size_t padded_input_dimension,size_t kernel_dimension,size_t dilation_dimension,size_t stride_dimension)28 static inline size_t compute_output_dimension(
29     size_t padded_input_dimension,
30     size_t kernel_dimension,
31     size_t dilation_dimension,
32     size_t stride_dimension)
33 {
34   const size_t effective_kernel_dimension = (kernel_dimension - 1) * dilation_dimension + 1;
35   return (padded_input_dimension - effective_kernel_dimension) / stride_dimension + 1;
36 }
37 
xnn_create_max_pooling2d_nhwc_u8(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)38 enum xnn_status xnn_create_max_pooling2d_nhwc_u8(
39     uint32_t input_padding_top,
40     uint32_t input_padding_right,
41     uint32_t input_padding_bottom,
42     uint32_t input_padding_left,
43     uint32_t pooling_height,
44     uint32_t pooling_width,
45     uint32_t stride_height,
46     uint32_t stride_width,
47     uint32_t dilation_height,
48     uint32_t dilation_width,
49     size_t channels,
50     size_t input_pixel_stride,
51     size_t output_pixel_stride,
52     uint8_t output_min,
53     uint8_t output_max,
54     uint32_t flags,
55     xnn_operator_t* max_pooling_op_out)
56 {
57   xnn_operator_t max_pooling_op = NULL;
58   enum xnn_status status = xnn_status_uninitialized;
59 
60   if (!xnn_params.initialized) {
61     xnn_log_error("failed to create Max Pooling operator: XNNPACK is not initialized");
62     goto error;
63   }
64 
65   status = xnn_status_invalid_parameter;
66 
67   const uint32_t pooling_size = pooling_height * pooling_width;
68   if (pooling_size == 0) {
69     xnn_log_error(
70       "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
71       "pooling size dimensions must be non-zero",
72       pooling_width, pooling_height);
73     goto error;
74   }
75 
76   if (pooling_size == 1) {
77     xnn_log_error(
78       "failed to create Max Pooling operator with 1 pooling element: 1x1 pooling is meaningless");
79     goto error;
80   }
81 
82   if (stride_height == 0 || stride_width == 0) {
83     xnn_log_error(
84       "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
85       "stride dimensions must be non-zero",
86       stride_width, stride_height);
87     goto error;
88   }
89 
90   if (dilation_height == 0 || dilation_width == 0) {
91     xnn_log_error(
92       "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " dilation: "
93       "dilation dimensions must be non-zero",
94       dilation_width, dilation_height);
95     goto error;
96   }
97 
98   if (channels == 0) {
99     xnn_log_error(
100       "failed to create Max Pooling operator with %zu channels: number of channels must be non-zero",
101       channels);
102     goto error;
103   }
104 
105   if (input_pixel_stride < channels) {
106     xnn_log_error(
107       "failed to create Max Pooling operator with input pixel stride of %zu: "
108       "stride must be at least as large as the number of channels (%zu)",
109       input_pixel_stride, channels);
110     goto error;
111   }
112 
113   if (output_pixel_stride < channels) {
114     xnn_log_error(
115       "failed to create Max Pooling operator with output pixel stride of %zu: "
116       "stride must be at least as large as the number of channels (%zu)",
117       output_pixel_stride, channels);
118     goto error;
119   }
120 
121   if (output_min >= output_max) {
122     xnn_log_error(
123       "failed to create Max Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
124       "range min must be below range max",
125       output_min, output_max);
126     goto error;
127   }
128 
129   status = xnn_status_out_of_memory;
130 
131   max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
132   if (max_pooling_op == NULL) {
133     xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
134     goto error;
135   }
136 
137   max_pooling_op->padding_top = input_padding_top;
138   max_pooling_op->padding_right = input_padding_right;
139   max_pooling_op->padding_bottom = input_padding_bottom;
140   max_pooling_op->padding_left = input_padding_left;
141 
142   max_pooling_op->kernel_height = pooling_height;
143   max_pooling_op->kernel_width = pooling_width;
144   max_pooling_op->stride_height = stride_height;
145   max_pooling_op->stride_width = stride_width;
146   max_pooling_op->dilation_height = dilation_height;
147   max_pooling_op->dilation_width = dilation_width;
148   max_pooling_op->channels = channels;
149   max_pooling_op->input_pixel_stride = input_pixel_stride;
150   max_pooling_op->output_pixel_stride = output_pixel_stride;
151 
152   max_pooling_op->u8_output_params = xnn_init_u8_output_params(output_min, output_max);
153 
154   max_pooling_op->type = xnn_operator_type_max_pooling_nhwc_u8;
155   max_pooling_op->ukernel.type = xnn_ukernel_type_max_pooling;
156 
157   max_pooling_op->state = xnn_run_state_invalid;
158 
159   *max_pooling_op_out = max_pooling_op;
160   return xnn_status_success;
161 
162 error:
163   xnn_delete_operator(max_pooling_op);
164   return status;
165 }
166 
xnn_create_max_pooling2d_nhwc_f32(uint32_t input_padding_top,uint32_t input_padding_right,uint32_t input_padding_bottom,uint32_t input_padding_left,uint32_t pooling_height,uint32_t pooling_width,uint32_t stride_height,uint32_t stride_width,uint32_t dilation_height,uint32_t dilation_width,size_t channels,size_t input_pixel_stride,size_t output_pixel_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * max_pooling_op_out)167 enum xnn_status xnn_create_max_pooling2d_nhwc_f32(
168     uint32_t input_padding_top,
169     uint32_t input_padding_right,
170     uint32_t input_padding_bottom,
171     uint32_t input_padding_left,
172     uint32_t pooling_height,
173     uint32_t pooling_width,
174     uint32_t stride_height,
175     uint32_t stride_width,
176     uint32_t dilation_height,
177     uint32_t dilation_width,
178     size_t channels,
179     size_t input_pixel_stride,
180     size_t output_pixel_stride,
181     float output_min,
182     float output_max,
183     uint32_t flags,
184     xnn_operator_t* max_pooling_op_out)
185 {
186   xnn_operator_t max_pooling_op = NULL;
187   enum xnn_status status = xnn_status_uninitialized;
188 
189   if (!xnn_params.initialized) {
190     xnn_log_error("failed to setup Max Pooling operator: XNNPACK is not initialized");
191     return xnn_status_uninitialized;
192   }
193 
194   status = xnn_status_invalid_parameter;
195 
196   const uint32_t pooling_size = pooling_height * pooling_width;
197   if (pooling_size == 0) {
198     xnn_log_error(
199       "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " pooling size: "
200       "pooling size dimensions must be non-zero",
201       pooling_width, pooling_height);
202     goto error;
203   }
204 
205   if (pooling_size == 1) {
206     xnn_log_error(
207       "failed to create Max Pooling operator with 1 pooling element: "
208       "1x1 pooling is meaningless");
209     goto error;
210   }
211 
212   if (stride_height == 0 || stride_width == 0) {
213     xnn_log_error(
214       "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " stride: "
215       "stride dimensions must be non-zero",
216       stride_width, stride_height);
217     goto error;
218   }
219 
220   if (dilation_height == 0 || dilation_width == 0) {
221     xnn_log_error(
222       "failed to create Max Pooling operator with %" PRIu32 "x%" PRIu32 " dilation: "
223       "dilation dimensions must be non-zero",
224       dilation_width, dilation_height);
225     goto error;
226   }
227 
228   if (channels == 0) {
229     xnn_log_error(
230       "failed to create Max Pooling operator with %zu channels: number of channels must be non-zero",
231       channels);
232     goto error;
233   }
234 
235   if (input_pixel_stride < channels) {
236     xnn_log_error(
237       "failed to create Max Pooling operator with input pixel stride of %zu: "
238       "stride must be at least as large as the number of channels (%zu)",
239       input_pixel_stride, channels);
240     goto error;
241   }
242 
243   if (output_pixel_stride < channels) {
244     xnn_log_error(
245       "failed to create Max Pooling operator with output pixel stride of %zu: "
246       "stride must be at least as large as the number of channels (%zu)",
247       output_pixel_stride, channels);
248     goto error;
249   }
250 
251   if (isnan(output_min)) {
252     xnn_log_error(
253       "failed to create Max Pooling with NaN output lower bound: lower bound must be non-NaN");
254     goto error;
255   }
256 
257   if (isnan(output_max)) {
258     xnn_log_error(
259       "failed to create Max Pooling with NaN output upper bound: upper bound must be non-NaN");
260     goto error;
261   }
262 
263   if (output_min >= output_max) {
264     xnn_log_error(
265       "failed to create Max Pooling with [%.7g, %.7g] output range: lower bound must be below upper bound",
266       output_min, output_max);
267     goto error;
268   }
269 
270   status = xnn_status_out_of_memory;
271 
272   max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
273   if (max_pooling_op == NULL) {
274     xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
275     goto error;
276   }
277 
278   max_pooling_op->padding_top = input_padding_top;
279   max_pooling_op->padding_right = input_padding_right;
280   max_pooling_op->padding_bottom = input_padding_bottom;
281   max_pooling_op->padding_left = input_padding_left;
282 
283   max_pooling_op->kernel_height = pooling_height;
284   max_pooling_op->kernel_width = pooling_width;
285   max_pooling_op->stride_height = stride_height;
286   max_pooling_op->stride_width = stride_width;
287   max_pooling_op->dilation_height = dilation_height;
288   max_pooling_op->dilation_width = dilation_width;
289   max_pooling_op->channels = channels;
290   max_pooling_op->input_pixel_stride = input_pixel_stride;
291   max_pooling_op->output_pixel_stride = output_pixel_stride;
292 
293   max_pooling_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
294 
295   max_pooling_op->type = xnn_operator_type_max_pooling_nhwc_f32;
296   max_pooling_op->ukernel.type = xnn_ukernel_type_max_pooling;
297 
298   max_pooling_op->state = xnn_run_state_invalid;
299 
300   *max_pooling_op_out = max_pooling_op;
301   return xnn_status_success;
302 
303 error:
304   xnn_delete_operator(max_pooling_op);
305   return status;
306 }
307 
setup_max_pooling2d(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const void * input,void * output,uint32_t log2_input_element_size,uint32_t log2_output_element_size,struct maxpool_parameters maxpool[restrict static1],const void * params,size_t num_threads)308 static enum xnn_status setup_max_pooling2d(
309   xnn_operator_t max_pooling_op,
310   size_t batch_size,
311   size_t input_height,
312   size_t input_width,
313   const void* input,
314   void* output,
315   uint32_t log2_input_element_size,
316   uint32_t log2_output_element_size,
317   struct maxpool_parameters maxpool[restrict static 1],
318   const void* params,
319   size_t num_threads)
320 {
321   max_pooling_op->state = xnn_run_state_invalid;
322 
323   if (!xnn_params.initialized) {
324     xnn_log_error(
325       "failed to setup Max Pooling operator: XNNPACK is not initialized");
326     return xnn_status_uninitialized;
327   }
328 
329   if (input_width == 0 || input_height == 0) {
330     xnn_log_error(
331       "failed to setup Max Pooling operator with %zux%zu input: input dimensions must be non-zero",
332       input_width, input_height);
333     return xnn_status_invalid_parameter;
334   }
335 
336   if (batch_size == 0) {
337     max_pooling_op->state = xnn_run_state_skip;
338     return xnn_status_success;
339   }
340 
341   max_pooling_op->input_height = input_height;
342   max_pooling_op->input_width = input_width;
343   max_pooling_op->input = input;
344 
345   max_pooling_op->output_height = compute_output_dimension(
346       max_pooling_op->padding_top + input_height + max_pooling_op->padding_bottom,
347       max_pooling_op->kernel_height,
348       max_pooling_op->dilation_height,
349       max_pooling_op->stride_height);
350   max_pooling_op->output_width = compute_output_dimension(
351       max_pooling_op->padding_left + input_width + max_pooling_op->padding_right,
352       max_pooling_op->kernel_width,
353       max_pooling_op->dilation_width,
354       max_pooling_op->stride_width);
355 
356   const size_t pooling_height = max_pooling_op->kernel_height;
357   const size_t pooling_width = max_pooling_op->kernel_width;
358   const size_t pooling_size = pooling_height * pooling_width;
359   const size_t output_height = max_pooling_op->output_height;
360   const size_t output_width = max_pooling_op->output_width;
361   const uint32_t mr = maxpool->mr;
362 
363   const size_t step_width =
364     max_pooling_op->dilation_width > 1 ? pooling_width : min(max_pooling_op->stride_width, pooling_width);
365   const size_t step_height = pooling_size + (output_width - 1) * step_width * pooling_height;
366 
367   if (input_height != max_pooling_op->last_input_height ||
368       input_width != max_pooling_op->last_input_width)
369   {
370     // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
371     const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + output_height * step_height);
372     const void** indirection_buffer = (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
373     if (indirection_buffer == NULL) {
374       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
375       return xnn_status_out_of_memory;
376     }
377     max_pooling_op->indirection_buffer = indirection_buffer;
378 
379     xnn_indirection_init_maxpool2d(max_pooling_op, step_height, step_width, log2_input_element_size);
380 
381     max_pooling_op->last_input = input;
382     max_pooling_op->last_input_height = input_height;
383     max_pooling_op->last_input_width = input_width;
384   }
385 
386   const uint32_t qr = maxpool->qr;
387   const size_t channels = max_pooling_op->channels;
388 
389   const size_t indirect_input_height_stride = step_height * sizeof(void*);
390   const size_t output_width_stride = max_pooling_op->output_pixel_stride << log2_output_element_size;
391   const size_t output_height_stride = output_width * output_width_stride;
392   const size_t multipass_adjustment = round_up(doz(pooling_size, mr), qr) + mr;
393 
394   max_pooling_op->context.max_pooling = (struct max_pooling_context) {
395     .indirect_input = max_pooling_op->indirection_buffer,
396     .indirect_input_height_stride = indirect_input_height_stride,
397     .input_offset = (size_t) ((uintptr_t) input - (uintptr_t) max_pooling_op->last_input),
398     .input_batch_stride = (input_height * input_width * max_pooling_op->input_pixel_stride) << log2_input_element_size,
399     .output = output,
400     .output_batch_stride = output_height * output_height_stride,
401     .output_height_stride = output_height_stride,
402     .output_width = output_width,
403     .pooling_size = pooling_size,
404     .channels = channels,
405     .input_increment = (pooling_height * step_width - multipass_adjustment) * sizeof(void*),
406     .output_increment = output_width_stride - (channels << log2_output_element_size),
407     .ukernel = maxpool->ukernel,
408   };
409   memcpy(&max_pooling_op->context.max_pooling.params, params, sizeof(max_pooling_op->context.max_pooling.params));
410 
411   max_pooling_op->compute.type = xnn_parallelization_type_2d;
412   max_pooling_op->compute.task_2d = (pthreadpool_task_2d_t) xnn_compute_max_pooling;
413   max_pooling_op->compute.range[0] = batch_size;
414   max_pooling_op->compute.range[1] = output_height;
415   max_pooling_op->state = xnn_run_state_ready;
416 
417   return xnn_status_success;
418 }
419 
xnn_setup_max_pooling2d_nhwc_u8(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)420 enum xnn_status xnn_setup_max_pooling2d_nhwc_u8(
421     xnn_operator_t max_pooling_op,
422     size_t batch_size,
423     size_t input_height,
424     size_t input_width,
425     const uint8_t* input,
426     uint8_t* output,
427     pthreadpool_t threadpool)
428 {
429   if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_u8) {
430     xnn_log_error("failed to setup Max Pooling (NHWC, U8) operator: operator type mismatch");
431     return xnn_status_invalid_parameter;
432   }
433 
434   return setup_max_pooling2d(
435     max_pooling_op,
436     batch_size, input_height, input_width,
437     input, output,
438     0 /* log2(sizeof(input element)) = log2(sizeof(uint8_t)) */,
439     0 /* log2(sizeof(output element)) = log2(sizeof(uint8_t)) */,
440     &xnn_params.u8.maxpool,
441     &max_pooling_op->u8_output_params,
442     pthreadpool_get_threads_count(threadpool));
443 }
444 
xnn_setup_max_pooling2d_nhwc_f32(xnn_operator_t max_pooling_op,size_t batch_size,size_t input_height,size_t input_width,const float * input,float * output,pthreadpool_t threadpool)445 enum xnn_status xnn_setup_max_pooling2d_nhwc_f32(
446     xnn_operator_t max_pooling_op,
447     size_t batch_size,
448     size_t input_height,
449     size_t input_width,
450     const float* input,
451     float* output,
452     pthreadpool_t threadpool)
453 {
454   if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f32) {
455     xnn_log_error("failed to setup Max Pooling (NHWC, F32) operator: operator type mismatch");
456     return xnn_status_invalid_parameter;
457   }
458 
459   return setup_max_pooling2d(
460     max_pooling_op,
461     batch_size, input_height, input_width,
462     input, output,
463     2 /* log2(sizeof(input element)) = log2(sizeof(float)) */,
464     2 /* log2(sizeof(output element)) = log2(sizeof(float)) */,
465     &xnn_params.f32.maxpool,
466     &max_pooling_op->f32_output_params,
467     pthreadpool_get_threads_count(threadpool));
468 }
469 
470