1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include <xnnpack.h>
16 #include <xnnpack/allocator.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/log.h>
19
20
xnn_create_sigmoid_nc_q8(size_t channels,size_t input_stride,size_t output_stride,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * sigmoid_op_out)21 enum xnn_status xnn_create_sigmoid_nc_q8(
22 size_t channels,
23 size_t input_stride,
24 size_t output_stride,
25 uint8_t input_zero_point,
26 float input_scale,
27 uint8_t output_zero_point,
28 float output_scale,
29 uint8_t output_min,
30 uint8_t output_max,
31 uint32_t flags,
32 xnn_operator_t* sigmoid_op_out)
33 {
34 xnn_operator_t sigmoid_op = NULL;
35 enum xnn_status status = xnn_status_uninitialized;
36
37 if (!xnn_params.initialized) {
38 xnn_log_error("failed to create Sigmoid operator: XNNPACK is not initialized");
39 goto error;
40 }
41
42 status = xnn_status_invalid_parameter;
43
44 if (channels == 0) {
45 xnn_log_error(
46 "failed to create Sigmoid operator with %zu channels: number of channels must be non-zero", channels);
47 goto error;
48 }
49
50 if (input_stride < channels) {
51 xnn_log_error(
52 "failed to create Sigmoid operator with input element stride of %zu: "
53 "stride must be at least as large as the number of channels (%zu)",
54 input_stride, channels);
55 goto error;
56 }
57
58 if (output_stride < channels) {
59 xnn_log_error(
60 "failed to create Sigmoid operator with output element stride of %zu: "
61 "stride must be at least as large as the number of channels (%zu)",
62 output_stride, channels);
63 goto error;
64 }
65
66 if (input_scale <= 0.0f || !isnormal(input_scale)) {
67 xnn_log_error(
68 "failed to create Sigmoid operator with %.7g input scale: scale must be finite, normalized, and positive",
69 input_scale);
70 goto error;
71 }
72
73 if (output_scale <= 0.0f || !isnormal(output_scale)) {
74 xnn_log_error(
75 "failed to create Sigmoid operator with %.7g output scale: scale must be finite, normalized, and positive",
76 output_scale);
77 goto error;
78 }
79
80 if (output_min >= output_max) {
81 xnn_log_error(
82 "failed to create Sigmoid operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
83 output_min, output_max);
84 goto error;
85 }
86
87 status = xnn_status_unsupported_parameter;
88
89 if (output_scale != 0x1.0p-8f) {
90 xnn_log_error(
91 "failed to create Sigmoid operator with %.7g output scale: only output scale of 1/256 is supported",
92 output_scale);
93 goto error;
94 }
95
96 if (output_zero_point != 0) {
97 xnn_log_error(
98 "failed to create Sigmoid operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
99 output_zero_point);
100 goto error;
101 }
102
103 status = xnn_status_out_of_memory;
104
105 sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
106 if (sigmoid_op == NULL) {
107 xnn_log_error("failed to allocate %zu bytes for Sigmoid operator descriptor", sizeof(struct xnn_operator));
108 goto error;
109 }
110
111 sigmoid_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
112 if (sigmoid_op->lookup_table == NULL) {
113 xnn_log_error("failed to allocate 256 bytes for Sigmoid lookup table");
114 goto error;
115 }
116
117 uint8_t* lookup_table = sigmoid_op->lookup_table;
118 const float scaled_min = (float) (int32_t) output_min;
119 const float scaled_max = (float) (int32_t) output_max;
120 for (int32_t i = 0; i < 256; i++) {
121 const float x = input_scale * (float) (i - (int32_t) (uint32_t) input_zero_point);
122 // Scale sigmoid(x) by 1 / output scale = 256.0
123 float scaled_sigmoid_x = 256.0f / (1.0f + expf(-x));
124 if (scaled_sigmoid_x < scaled_min) {
125 scaled_sigmoid_x = scaled_min;
126 }
127 if (scaled_sigmoid_x > scaled_max) {
128 scaled_sigmoid_x = scaled_max;
129 }
130 lookup_table[(uint32_t) i] = (uint8_t) lrintf(scaled_sigmoid_x);
131 }
132
133 sigmoid_op->channels = channels;
134 sigmoid_op->input_pixel_stride = input_stride;
135 sigmoid_op->output_pixel_stride = output_stride;
136
137 sigmoid_op->type = xnn_operator_type_sigmoid_nc_q8;
138 sigmoid_op->ukernel.type = xnn_ukernel_type_lut;
139
140 sigmoid_op->state = xnn_run_state_invalid;
141
142 *sigmoid_op_out = sigmoid_op;
143 return xnn_status_success;
144
145 error:
146 xnn_delete_operator(sigmoid_op);
147 return status;
148 }
149
xnn_create_sigmoid_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sigmoid_op_out)150 enum xnn_status xnn_create_sigmoid_nc_f32(
151 size_t channels,
152 size_t input_stride,
153 size_t output_stride,
154 uint32_t flags,
155 xnn_operator_t* sigmoid_op_out)
156 {
157 xnn_operator_t sigmoid_op = NULL;
158 enum xnn_status status = xnn_status_uninitialized;
159
160 if (!xnn_params.initialized) {
161 xnn_log_error("failed to create Sigmoid operator: XNNPACK is not initialized");
162 goto error;
163 }
164
165 status = xnn_status_invalid_parameter;
166
167 if (channels == 0) {
168 xnn_log_error(
169 "failed to create Sigmoid operator with %zu channels: number of channels must be non-zero", channels);
170 goto error;
171 }
172
173 if (input_stride < channels) {
174 xnn_log_error(
175 "failed to create Sigmoid operator with input element stride of %zu: "
176 "stride must be at least as large as the number of channels (%zu)",
177 input_stride, channels);
178 goto error;
179 }
180
181 if (output_stride < channels) {
182 xnn_log_error(
183 "failed to create Sigmoid operator with output element stride of %zu: "
184 "stride must be at least as large as the number of channels (%zu)",
185 output_stride, channels);
186 goto error;
187 }
188
189 status = xnn_status_unsupported_hardware;
190
191 if (xnn_params.f32.sigmoid == NULL) {
192 xnn_log_error(
193 "failed to create Sigmoid operator: "
194 "only selected hardware configurations are supported");
195 goto error;
196 }
197
198 status = xnn_status_out_of_memory;
199
200 sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
201 if (sigmoid_op == NULL) {
202 xnn_log_error("failed to allocate %zu bytes for xnn_operator structure", sizeof(struct xnn_operator));
203 goto error;
204 }
205
206 sigmoid_op->channels = channels;
207 sigmoid_op->input_pixel_stride = input_stride;
208 sigmoid_op->output_pixel_stride = output_stride;
209
210 sigmoid_op->type = xnn_operator_type_sigmoid_nc_f32;
211 sigmoid_op->ukernel.type = xnn_ukernel_type_sigmoid;
212
213 sigmoid_op->state = xnn_run_state_invalid;
214
215 *sigmoid_op_out = sigmoid_op;
216 return xnn_status_success;
217
218 error:
219 xnn_delete_operator(sigmoid_op);
220 return status;
221 }
222
xnn_setup_sigmoid_nc_q8(xnn_operator_t sigmoid_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)223 enum xnn_status xnn_setup_sigmoid_nc_q8(
224 xnn_operator_t sigmoid_op,
225 size_t batch_size,
226 const uint8_t* input,
227 uint8_t* output,
228 pthreadpool_t threadpool)
229 {
230 if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_q8) {
231 xnn_log_error("failed to setup Sigmoid (Q8) operator: operator type mismatch");
232 return xnn_status_invalid_parameter;
233 }
234 sigmoid_op->state = xnn_run_state_invalid;
235
236 if (!xnn_params.initialized) {
237 xnn_log_error("failed to setup Sigmoid operator: XNNPACK is not initialized");
238 return xnn_status_uninitialized;
239 }
240
241 if (batch_size == 0) {
242 sigmoid_op->state = xnn_run_state_skip;
243 return xnn_status_success;
244 }
245
246 sigmoid_op->batch_size = batch_size;
247 sigmoid_op->input = input;
248 sigmoid_op->output = output;
249
250 const size_t channels = sigmoid_op->channels;
251 const size_t input_stride = sigmoid_op->input_pixel_stride;
252 const size_t output_stride = sigmoid_op->output_pixel_stride;
253 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
254 const size_t block_size = 1024;
255 sigmoid_op->context.lut_contiguous = (struct lut_contiguous_context) {
256 .x = input,
257 .x_stride = input_stride * sizeof(uint8_t),
258 .t = sigmoid_op->lookup_table,
259 .y = output,
260 .y_stride = output_stride * sizeof(uint8_t),
261 .ukernel = xnn_params.x8.lut,
262 };
263 sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
264 sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_lut_contiguous;
265 sigmoid_op->compute.range[0] = batch_size * channels * sizeof(uint8_t);
266 sigmoid_op->compute.tile[0] = block_size;
267 } else {
268 sigmoid_op->context.lut_strided = (struct lut_strided_context) {
269 .n = channels,
270 .x = input,
271 .x_stride = input_stride * sizeof(uint8_t),
272 .t = sigmoid_op->lookup_table,
273 .y = output,
274 .y_stride = output_stride * sizeof(uint8_t),
275 .ukernel = xnn_params.x8.lut,
276 };
277 sigmoid_op->compute.type = xnn_parallelization_type_1d;
278 sigmoid_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_lut_strided;
279 sigmoid_op->compute.range[0] = batch_size;
280 sigmoid_op->compute.tile[0] = 0;
281 }
282 sigmoid_op->state = xnn_run_state_ready;
283
284 return xnn_status_success;
285 }
286
xnn_setup_sigmoid_nc_f32(xnn_operator_t sigmoid_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)287 enum xnn_status xnn_setup_sigmoid_nc_f32(
288 xnn_operator_t sigmoid_op,
289 size_t batch_size,
290 const float* input,
291 float* output,
292 pthreadpool_t threadpool)
293 {
294 if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_f32) {
295 xnn_log_error("failed to setup Sigmoid (F32) operator: operator type mismatch");
296 return xnn_status_invalid_parameter;
297 }
298 sigmoid_op->state = xnn_run_state_invalid;
299
300 if (!xnn_params.initialized) {
301 xnn_log_error("failed to setup Sigmoid operator: XNNPACK is not initialized");
302 return xnn_status_uninitialized;
303 }
304
305 if (batch_size == 0) {
306 sigmoid_op->state = xnn_run_state_skip;
307 return xnn_status_success;
308 }
309
310 const size_t channels = sigmoid_op->channels;
311 const size_t input_stride = sigmoid_op->input_pixel_stride;
312 const size_t output_stride = sigmoid_op->output_pixel_stride;
313 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
314 const size_t block_size = 4096;
315 sigmoid_op->context.univector_contiguous = (struct univector_contiguous_context) {
316 .x = input,
317 .x_stride = input_stride * sizeof(float),
318 .y = output,
319 .y_stride = output_stride * sizeof(float),
320 .ukernel = xnn_params.f32.sigmoid,
321 };
322 sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
323 sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
324 sigmoid_op->compute.range[0] = batch_size * channels * sizeof(float);
325 sigmoid_op->compute.tile[0] = block_size;
326 } else {
327 sigmoid_op->context.univector_strided = (struct univector_strided_context) {
328 .n = channels * sizeof(float),
329 .x = input,
330 .x_stride = input_stride * sizeof(float),
331 .y = output,
332 .y_stride = output_stride * sizeof(float),
333 .ukernel = xnn_params.f32.sigmoid,
334 };
335 sigmoid_op->compute.type = xnn_parallelization_type_1d_tile_1d;
336 sigmoid_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
337 sigmoid_op->compute.range[0] = batch_size;
338 sigmoid_op->compute.tile[0] = 1;
339 }
340 sigmoid_op->state = xnn_run_state_ready;
341
342 return xnn_status_success;
343 }
344