• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19 
20 
xnn_create_sigmoid_nc_q8(size_t channels,size_t input_stride,size_t output_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * sigmoid_op_out)21 enum xnn_status xnn_create_sigmoid_nc_q8(
22     size_t channels,
23     size_t input_stride,
24     size_t output_stride,
25     uint8_t input_zero_point,
26     float input_scale,
27     uint8_t output_zero_point,
28     float output_scale,
29     uint8_t output_min,
30     uint8_t output_max,
31     uint32_t flags,
32     xnn_operator_t* sigmoid_op_out)
33 {
34   xnn_operator_t sigmoid_op = NULL;
35   enum xnn_status status = xnn_status_uninitialized;
36 
37   if (!xnn_params.initialized) {
38     xnn_log_error("failed to create Sigmoid operator: XNNPACK is not initialized");
39     goto error;
40   }
41 
42   status = xnn_status_invalid_parameter;
43 
44   if (channels == 0) {
45     xnn_log_error(
46       "failed to create Sigmoid operator with %zu channels: number of channels must be non-zero", channels);
47     goto error;
48   }
49 
50   if (input_stride < channels) {
51     xnn_log_error(
52       "failed to create Sigmoid operator with input element stride of %zu: "
53       "stride must be at least as large as the number of channels (%zu)",
54       input_stride, channels);
55     goto error;
56   }
57 
58   if (output_stride < channels) {
59     xnn_log_error(
60       "failed to create Sigmoid operator with output element stride of %zu: "
61       "stride must be at least as large as the number of channels (%zu)",
62       output_stride, channels);
63     goto error;
64   }
65 
66   if (input_scale <= 0.0f || !isnormal(input_scale)) {
67     xnn_log_error(
68       "failed to create Sigmoid operator with %.7g input scale: scale must be finite, normalized, and positive",
69       input_scale);
70     goto error;
71   }
72 
73   if (output_scale <= 0.0f || !isnormal(output_scale)) {
74     xnn_log_error(
75       "failed to create Sigmoid operator with %.7g output scale: scale must be finite, normalized, and positive",
76       output_scale);
77     goto error;
78   }
79 
80   if (output_min >= output_max) {
81     xnn_log_error(
82       "failed to create Sigmoid operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
83       output_min, output_max);
84     goto error;
85   }
86 
87   status = xnn_status_unsupported_parameter;
88 
89   if (output_scale != 0x1.0p-8f) {
90     xnn_log_error(
91       "failed to create Sigmoid operator with %.7g output scale: only output scale of 1/256 is supported",
92       output_scale);
93     goto error;
94   }
95 
96   if (output_zero_point != 0) {
97     xnn_log_error(
98       "failed to create Sigmoid operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
99       output_zero_point);
100     goto error;
101   }
102 
103   status = xnn_status_out_of_memory;
104 
105   sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
106   if (sigmoid_op == NULL) {
107     xnn_log_error("failed to allocate %zu bytes for Sigmoid operator descriptor", sizeof(struct xnn_operator));
108     goto error;
109   }
110 
111   sigmoid_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
112   if (sigmoid_op->lookup_table == NULL) {
113     xnn_log_error("failed to allocate 256 bytes for Sigmoid lookup table");
114     goto error;
115   }
116 
117   uint8_t* lookup_table = sigmoid_op->lookup_table;
118   const float scaled_min = (float) (int32_t) output_min;
119   const float scaled_max = (float) (int32_t) output_max;
120   for (int32_t i = 0; i < 256; i++) {
121     const float x = input_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
122     // Scale sigmoid(x) by 1 / output scale = 256.0
123     float scaled_sigmoid_x = 256.0f / (1.0f + expf(-x));
124     if (scaled_sigmoid_x < scaled_min) {
125       scaled_sigmoid_x = scaled_min;
126     }
127     if (scaled_sigmoid_x > scaled_max) {
128       scaled_sigmoid_x = scaled_max;
129     }
130     lookup_table[(uint32_t) i] = (uint8_t) lrintf(scaled_sigmoid_x);
131   }
132 
133   sigmoid_op->channels = channels;
134   sigmoid_op->input_pixel_stride = input_stride;
135   sigmoid_op->output_pixel_stride = output_stride;
136 
137   sigmoid_op->type = xnn_operator_type_sigmoid_nc_q8;
138   sigmoid_op->ukernel.type = xnn_ukernel_type_lut;
139 
140   sigmoid_op->state = xnn_run_state_invalid;
141 
142   *sigmoid_op_out = sigmoid_op;
143   return xnn_status_success;
144 
145 error:
146   xnn_delete_operator(sigmoid_op);
147   return status;
148 }
149 
xnn_create_sigmoid_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sigmoid_op_out)150 enum xnn_status xnn_create_sigmoid_nc_f32(
151     size_t channels,
152     size_t input_stride,
153     size_t output_stride,
154     uint32_t flags,
155     xnn_operator_t* sigmoid_op_out)
156 {
157   xnn_operator_t sigmoid_op = NULL;
158   enum xnn_status status = xnn_status_uninitialized;
159 
160   if (!xnn_params.initialized) {
161     xnn_log_error("failed to create Sigmoid operator: XNNPACK is not initialized");
162     goto error;
163   }
164 
165   status = xnn_status_invalid_parameter;
166 
167   if (channels == 0) {
168     xnn_log_error(
169       "failed to create Sigmoid operator with %zu channels: number of channels must be non-zero", channels);
170     goto error;
171   }
172 
173   if (input_stride < channels) {
174     xnn_log_error(
175       "failed to create Sigmoid operator with input element stride of %zu: "
176       "stride must be at least as large as the number of channels (%zu)",
177       input_stride, channels);
178     goto error;
179   }
180 
181   if (output_stride < channels) {
182     xnn_log_error(
183       "failed to create Sigmoid operator with output element stride of %zu: "
184       "stride must be at least as large as the number of channels (%zu)",
185       output_stride, channels);
186     goto error;
187   }
188 
189   status = xnn_status_unsupported_hardware;
190 
191   if (xnn_params.f32.sigmoid == NULL) {
192     xnn_log_error(
193       "failed to create Sigmoid operator: "
194       "only selected hardware configurations are supported");
195     goto error;
196   }
197 
198   status = xnn_status_out_of_memory;
199 
200   sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
201   if (sigmoid_op == NULL) {
202     xnn_log_error("failed to allocate %zu bytes for xnn_operator structure", sizeof(struct xnn_operator));
203     goto error;
204   }
205 
206   sigmoid_op->channels = channels;
207   sigmoid_op->input_pixel_stride = input_stride;
208   sigmoid_op->output_pixel_stride = output_stride;
209 
210   sigmoid_op->type = xnn_operator_type_sigmoid_nc_f32;
211   sigmoid_op->ukernel.type = xnn_ukernel_type_sigmoid;
212 
213   sigmoid_op->state = xnn_run_state_invalid;
214 
215   *sigmoid_op_out = sigmoid_op;
216   return xnn_status_success;
217 
218 error:
219   xnn_delete_operator(sigmoid_op);
220   return status;
221 }
222 
xnn_setup_sigmoid_nc_q8(xnn_operator_t sigmoid_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)223 enum xnn_status xnn_setup_sigmoid_nc_q8(
224     xnn_operator_t sigmoid_op,
225     size_t batch_size,
226     const uint8_t* input,
227     uint8_t* output,
228     pthreadpool_t threadpool)
229 {
230   if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_q8) {
231     xnn_log_error("failed to setup Sigmoid (Q8) operator: operator type mismatch");
232     return xnn_status_invalid_parameter;
233   }
234   sigmoid_op->state = xnn_run_state_invalid;
235 
236   if (!xnn_params.initialized) {
237     xnn_log_error("failed to setup Sigmoid operator: XNNPACK is not initialized");
238     return xnn_status_uninitialized;
239   }
240 
241   if (batch_size == 0) {
242     sigmoid_op->state = xnn_run_state_skip;
243     return xnn_status_success;
244   }
245 
246   sigmoid_op->batch_size = batch_size;
247   sigmoid_op->input = input;
248   sigmoid_op->output = output;
249 
250   const size_t channels = sigmoid_op->channels;
251   const size_t input_stride = sigmoid_op->input_pixel_stride;
252   const size_t output_stride = sigmoid_op->output_pixel_stride;
253   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
254     const size_t block_size = 1024;
255     sigmoid_op->context.lut_contiguous = (struct lut_contiguous_context) {
256       .x = input,
257       .x_stride = input_stride * sizeof(uint8_t),
258       .t = sigmoid_op->lookup_table,
259       .y = output,
260       .y_stride = output_stride * sizeof(uint8_t),
261       .ukernel = xnn_params.x8.lut,
262     };
263     sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
264     sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
265     sigmoid_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
266     sigmoid_op->compute.tile[0] = block_size;
267   } else {
268     sigmoid_op->context.lut_strided = (struct lut_strided_context) {
269       .n = channels,
270       .x = input,
271       .x_stride = input_stride * sizeof(uint8_t),
272       .t = sigmoid_op->lookup_table,
273       .y = output,
274       .y_stride = output_stride * sizeof(uint8_t),
275       .ukernel = xnn_params.x8.lut,
276     };
277     sigmoid_op->compute.type = xnn_parallelization_type_1d;
278     sigmoid_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
279     sigmoid_op->compute.range[0] = batch_size;
280     sigmoid_op->compute.tile[0] = 0;
281   }
282   sigmoid_op->state = xnn_run_state_ready;
283 
284   return xnn_status_success;
285 }
286 
xnn_setup_sigmoid_nc_f32(xnn_operator_t sigmoid_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)287 enum xnn_status xnn_setup_sigmoid_nc_f32(
288     xnn_operator_t sigmoid_op,
289     size_t batch_size,
290     const float* input,
291     float* output,
292     pthreadpool_t threadpool)
293 {
294   if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_f32) {
295     xnn_log_error("failed to setup Sigmoid (F32) operator: operator type mismatch");
296     return xnn_status_invalid_parameter;
297   }
298   sigmoid_op->state = xnn_run_state_invalid;
299 
300   if (!xnn_params.initialized) {
301     xnn_log_error("failed to setup Sigmoid operator: XNNPACK is not initialized");
302     return xnn_status_uninitialized;
303   }
304 
305   if (batch_size == 0) {
306     sigmoid_op->state = xnn_run_state_skip;
307     return xnn_status_success;
308   }
309 
310   const size_t channels = sigmoid_op->channels;
311   const size_t input_stride = sigmoid_op->input_pixel_stride;
312   const size_t output_stride = sigmoid_op->output_pixel_stride;
313   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
314     const size_t block_size = 4096;
315     sigmoid_op->context.univector_contiguous = (struct univector_contiguous_context) {
316       .x = input,
317       .x_stride = input_stride * sizeof(float),
318       .y = output,
319       .y_stride = output_stride * sizeof(float),
320       .ukernel = xnn_params.f32.sigmoid,
321     };
322     sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
323     sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
324     sigmoid_op->compute.range[0] = batch_size * channels * sizeof(float);
325     sigmoid_op->compute.tile[0] = block_size;
326   } else {
327     sigmoid_op->context.univector_strided = (struct univector_strided_context) {
328       .n = channels * sizeof(float),
329       .x = input,
330       .x_stride = input_stride * sizeof(float),
331       .y = output,
332       .y_stride = output_stride * sizeof(float),
333       .ukernel = xnn_params.f32.sigmoid,
334     };
335     sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
336     sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
337     sigmoid_op->compute.range[0] = batch_size;
338     sigmoid_op->compute.tile[0] = 1;
339   }
340   sigmoid_op->state = xnn_run_state_ready;
341 
342   return xnn_status_success;
343 }
344