• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/log.h>
18 #include <xnnpack/operator.h>
19 #include <xnnpack/params-init.h>
20 #include <xnnpack/params.h>
21 
22 
xnn_create_global_average_pooling_nwc_q8(size_t channels,size_t input_stride,size_t output_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * global_average_pooling_op_out)23 enum xnn_status xnn_create_global_average_pooling_nwc_q8(
24     size_t channels,
25     size_t input_stride,
26     size_t output_stride,
27     uint8_t input_zero_point,
28     float input_scale,
29     uint8_t output_zero_point,
30     float output_scale,
31     uint8_t output_min,
32     uint8_t output_max,
33     uint32_t flags,
34     xnn_operator_t* global_average_pooling_op_out)
35 {
36   xnn_operator_t global_average_pooling_op = NULL;
37   enum xnn_status status = xnn_status_uninitialized;
38 
39   if (!xnn_params.initialized) {
40     xnn_log_error("failed to create Global Average Pooling operator: XNNPACK is not initialized");
41     goto error;
42   }
43 
44   status = xnn_status_invalid_parameter;
45 
46   if (channels == 0) {
47     xnn_log_error(
48       "failed to create Global Average Pooling operator with %zu channels: number of channels must be non-zero",
49       channels);
50     goto error;
51   }
52 
53   if (input_stride < channels) {
54     xnn_log_error(
55       "failed to create Global Average Pooling operator with input element stride of %zu: "
56       "stride must be at least as large as the number of channels (%zu)",
57       input_stride, channels);
58     goto error;
59   }
60 
61   if (output_stride < channels) {
62     xnn_log_error(
63       "failed to create Global Average Pooling operator with output element stride of %zu: "
64       "stride must be at least as large as the number of channels (%zu)",
65       output_stride, channels);
66     goto error;
67   }
68 
69   if (input_scale <= 0.0f || !isnormal(input_scale)) {
70     xnn_log_error(
71       "failed to create Global Average Pooling operator with %.7g input scale: "
72       "scale must be finite, normalized, and positive",
73       input_scale);
74     goto error;
75   }
76 
77   if (output_scale <= 0.0f || !isnormal(output_scale)) {
78     xnn_log_error(
79       "failed to create Global Average Pooling operator with %.7g output scale: "
80       "scale must be finite, normalized, and positive",
81       output_scale);
82     goto error;
83   }
84 
85   if (output_min >= output_max) {
86     xnn_log_error(
87       "failed to create Global Average Pooling operator with [%" PRIu8 ", %" PRIu8 "] output range: "
88       "range min must be below range max",
89       output_min, output_max);
90     goto error;
91   }
92 
93   status = xnn_status_unsupported_parameter;
94 
95   const float input_output_scale = input_scale / output_scale;
96   if (input_output_scale < 0x1.0p-8f || input_output_scale >= 0x1.0p+8f) {
97     xnn_log_error(
98       "failed to create Global Average Pooling operator with %.7g input-to-output scale ratio: "
99       "scale ratio must be in [2**-8, 2**8) range",
100       input_output_scale);
101     goto error;
102   }
103 
104   status = xnn_status_out_of_memory;
105 
106   global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
107   if (global_average_pooling_op == NULL) {
108     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
109     goto error;
110   }
111 
112   void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
113   if (zero_buffer == NULL) {
114     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling zero padding",
115       channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
116     goto error;
117   }
118   global_average_pooling_op->zero_buffer = zero_buffer;
119 
120   global_average_pooling_op->channels = channels;
121   global_average_pooling_op->input_pixel_stride = input_stride;
122   global_average_pooling_op->output_pixel_stride = output_stride;
123   global_average_pooling_op->input_zero_point = input_zero_point;
124   global_average_pooling_op->output_zero_point = output_zero_point;
125   global_average_pooling_op->input_scale = input_scale;
126   global_average_pooling_op->output_scale = output_scale;
127   global_average_pooling_op->output_min = output_min;
128   global_average_pooling_op->output_max = output_max;
129 
130   global_average_pooling_op->type = xnn_operator_type_global_average_pooling_nwc_q8;
131   global_average_pooling_op->ukernel.type = xnn_ukernel_type_global_average_pooling;
132 
133   global_average_pooling_op->state = xnn_run_state_invalid;
134 
135   *global_average_pooling_op_out = global_average_pooling_op;
136   return xnn_status_success;
137 
138 error:
139   xnn_delete_operator(global_average_pooling_op);
140   return status;
141 }
142 
xnn_create_global_average_pooling_nwc_f32(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * global_average_pooling_op_out)143 enum xnn_status xnn_create_global_average_pooling_nwc_f32(
144     size_t channels,
145     size_t input_stride,
146     size_t output_stride,
147     float output_min,
148     float output_max,
149     uint32_t flags,
150     xnn_operator_t* global_average_pooling_op_out)
151 {
152   xnn_operator_t global_average_pooling_op = NULL;
153   enum xnn_status status = xnn_status_uninitialized;
154 
155   if (!xnn_params.initialized) {
156     xnn_log_error("failed to create Global Average Pooling operator: XNNPACK is not initialized");
157     goto error;
158   }
159 
160   status = xnn_status_invalid_parameter;
161 
162   if (channels == 0) {
163     xnn_log_error(
164       "failed to create Global Average Pooling operator with %zu channels: number of channels must be non-zero",
165       channels);
166     goto error;
167   }
168 
169   if (input_stride < channels) {
170     xnn_log_error(
171       "failed to create Global Average Pooling operator with input element stride of %zu: "
172       "stride must be at least as large as the number of channels (%zu)",
173       input_stride, channels);
174     goto error;
175   }
176 
177   if (output_stride < channels) {
178     xnn_log_error(
179       "failed to create Global Average Pooling operator with output element stride of %zu: "
180       "stride must be at least as large as the number of channels (%zu)",
181       output_stride, channels);
182     goto error;
183   }
184 
185   if (isnan(output_min)) {
186     xnn_log_error(
187       "failed to create Global Average Pooling operator with NaN output lower bound: lower bound must be non-NaN");
188     goto error;
189   }
190 
191   if (isnan(output_max)) {
192     xnn_log_error(
193       "failed to create Global Average Pooling operator with NaN output upper bound: upper bound must be non-NaN");
194     goto error;
195   }
196 
197   if (output_min >= output_max) {
198     xnn_log_error(
199       "failed to create Global Average Pooling operator with [%.7g, %.7g] output range: "
200       "lower bound must be below upper bound",
201       output_min, output_max);
202     goto error;
203   }
204 
205   status = xnn_status_out_of_memory;
206 
207   global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
208   if (global_average_pooling_op == NULL) {
209     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
210     goto error;
211   }
212 
213   void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
214   if (zero_buffer == NULL) {
215     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling zero padding",
216       channels * sizeof(float) + XNN_EXTRA_BYTES);
217     goto error;
218   }
219   global_average_pooling_op->zero_buffer = zero_buffer;
220 
221   global_average_pooling_op->channels = channels;
222   global_average_pooling_op->input_pixel_stride = input_stride;
223   global_average_pooling_op->output_pixel_stride = output_stride;
224   global_average_pooling_op->f32_avgpool_params = xnn_init_f32_avgpool_params(nanf(""), output_min, output_max);
225 
226   global_average_pooling_op->type = xnn_operator_type_global_average_pooling_nwc_f32;
227   global_average_pooling_op->ukernel.type = xnn_ukernel_type_global_average_pooling;
228 
229   global_average_pooling_op->state = xnn_run_state_invalid;
230 
231   *global_average_pooling_op_out = global_average_pooling_op;
232   return xnn_status_success;
233 
234 error:
235   xnn_delete_operator(global_average_pooling_op);
236   return status;
237 }
238 
xnn_setup_global_average_pooling_nwc_q8(xnn_operator_t global_average_pooling_op,size_t batch_size,size_t width,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)239 enum xnn_status xnn_setup_global_average_pooling_nwc_q8(
240     xnn_operator_t global_average_pooling_op,
241     size_t batch_size,
242     size_t width,
243     const uint8_t* input,
244     uint8_t* output,
245     pthreadpool_t threadpool)
246 {
247   if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_nwc_q8) {
248     xnn_log_error("failed to setup Global Average Pooling (NWC, Q8) operator: operator type mismatch");
249     return xnn_status_invalid_parameter;
250   }
251   global_average_pooling_op->state = xnn_run_state_invalid;
252 
253   if (!xnn_params.initialized) {
254     xnn_log_error("failed to setup Global Average Pooling operator: XNNPACK is not initialized");
255     return xnn_status_uninitialized;
256   }
257 
258   if (width == 0) {
259     xnn_log_error("failed to setup Global Average Pooling operator with width %zu: width must be non-zero", width);
260     return xnn_status_invalid_parameter;
261   }
262 
263   if (batch_size == 0) {
264     global_average_pooling_op->state = xnn_run_state_skip;
265     return xnn_status_success;
266   }
267 
268   global_average_pooling_op->batch_size = batch_size;
269   global_average_pooling_op->input_width = width;
270   global_average_pooling_op->input = input;
271   global_average_pooling_op->output = output;
272 
273   global_average_pooling_op->q8_avgpool_params =
274     xnn_init_q8_avgpool_params(
275       -(int32_t) width * (int32_t) (uint32_t) global_average_pooling_op->input_zero_point,
276       global_average_pooling_op->input_scale / (global_average_pooling_op->output_scale * (float) width),
277       global_average_pooling_op->output_zero_point,
278       global_average_pooling_op->output_min,
279       global_average_pooling_op->output_max);
280 
281   const size_t input_stride_in_bytes = global_average_pooling_op->input_pixel_stride * sizeof(uint8_t);
282   const size_t channels = global_average_pooling_op->channels;
283   global_average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
284       .input = input,
285       .zero = global_average_pooling_op->zero_buffer,
286       .input_pixel_stride = input_stride_in_bytes,
287       .input_batch_stride = input_stride_in_bytes * width,
288       .input_elements = width,
289       .channels = channels,
290       .output = output,
291       .output_batch_stride = global_average_pooling_op->output_pixel_stride * sizeof(uint8_t),
292       .params.q8 = global_average_pooling_op->q8_avgpool_params,
293   };
294   global_average_pooling_op->compute.type = xnn_parallelization_type_1d;
295   global_average_pooling_op->compute.range[0] = batch_size;
296 
297   if (width <= xnn_params.q8.gavgpool.mr) {
298     global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
299     global_average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = xnn_params.q8.gavgpool.up;
300   } else {
301     global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
302     global_average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = xnn_params.q8.gavgpool.mp;
303   }
304   global_average_pooling_op->state = xnn_run_state_ready;
305 
306   return xnn_status_success;
307 }
308 
xnn_setup_global_average_pooling_nwc_f32(xnn_operator_t global_average_pooling_op,size_t batch_size,size_t width,const float * input,float * output,pthreadpool_t threadpool)309 enum xnn_status xnn_setup_global_average_pooling_nwc_f32(
310     xnn_operator_t global_average_pooling_op,
311     size_t batch_size,
312     size_t width,
313     const float* input,
314     float* output,
315     pthreadpool_t threadpool)
316 {
317   if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_nwc_f32) {
318     xnn_log_error("failed to setup Global Average Pooling (NWC, F32) operator: operator type mismatch");
319     return xnn_status_invalid_parameter;
320   }
321   global_average_pooling_op->state = xnn_run_state_invalid;
322 
323   if (!xnn_params.initialized) {
324     xnn_log_error("failed to setup Global Average Pooling operator: XNNPACK is not initialized");
325     return xnn_status_uninitialized;
326   }
327 
328   if (width == 0) {
329     xnn_log_error("failed to setup Global Average Pooling operator with width %zu: width must be non-zero", width);
330     return xnn_status_invalid_parameter;
331   }
332 
333   if (batch_size == 0) {
334     global_average_pooling_op->state = xnn_run_state_skip;
335     return xnn_status_success;
336   }
337 
338   global_average_pooling_op->batch_size = batch_size;
339   global_average_pooling_op->input_width = width;
340   global_average_pooling_op->input = input;
341   global_average_pooling_op->output = output;
342 
343   xnn_update_f32_avgpool_params(&global_average_pooling_op->f32_avgpool_params, 1.0f / (float) width);
344 
345   const size_t input_stride_in_bytes = global_average_pooling_op->input_pixel_stride * sizeof(float);
346   const size_t channels = global_average_pooling_op->channels;
347   global_average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
348       .input = input,
349       .zero = global_average_pooling_op->zero_buffer,
350       .input_pixel_stride = input_stride_in_bytes,
351       .input_batch_stride = input_stride_in_bytes * width,
352       .input_elements = width,
353       .channels = channels,
354       .output = output,
355       .output_batch_stride = global_average_pooling_op->output_pixel_stride * sizeof(float),
356       .params.f32 = global_average_pooling_op->f32_avgpool_params,
357   };
358   global_average_pooling_op->compute.type = xnn_parallelization_type_1d;
359   global_average_pooling_op->compute.range[0] = batch_size;
360 
361   if (width <= xnn_params.f32.gavgpool.mr) {
362     global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
363     global_average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = xnn_params.f32.gavgpool.up;
364   } else {
365     global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
366     global_average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = xnn_params.f32.gavgpool.mp;
367   }
368   global_average_pooling_op->state = xnn_run_state_ready;
369 
370   return xnn_status_success;
371 }
372