1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14
15 #include <fp16.h>
16
17 #include <xnnpack.h>
18 #include <xnnpack/allocator.h>
19 #include <xnnpack/operator.h>
20 #include <xnnpack/log.h>
21 #include <xnnpack/microparams-init.h>
22
23
xnn_create_softmax_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * softmax_op_out)24 enum xnn_status xnn_create_softmax_nc_qu8(
25 size_t channels,
26 size_t input_stride,
27 size_t output_stride,
28 float input_scale,
29 uint8_t output_zero_point,
30 float output_scale,
31 uint32_t flags,
32 xnn_operator_t* softmax_op_out)
33 {
34 xnn_operator_t softmax_op = NULL;
35 enum xnn_status status = xnn_status_uninitialized;
36
37 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
38 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
39 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
40 goto error;
41 }
42
43 status = xnn_status_invalid_parameter;
44
45 if (channels == 0) {
46 xnn_log_error(
47 "failed to create %s operator with %zu channels: number of channels must be non-zero",
48 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), channels);
49 goto error;
50 }
51
52 if (input_stride < channels) {
53 xnn_log_error(
54 "failed to create %s operator with input element stride of %zu: "
55 "stride must be at least as large as the number of channels (%zu)",
56 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_stride, channels);
57 goto error;
58 }
59
60 if (output_stride < channels) {
61 xnn_log_error(
62 "failed to create %s operator with output element stride of %zu: "
63 "stride must be at least as large as the number of channels (%zu)",
64 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_stride, channels);
65 goto error;
66 }
67
68 if (input_scale <= 0.0f || !isnormal(input_scale)) {
69 xnn_log_error(
70 "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
71 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_scale);
72 goto error;
73 }
74
75 if (output_scale <= 0.0f || !isnormal(output_scale)) {
76 xnn_log_error(
77 "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
78 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
79 goto error;
80 }
81
82 status = xnn_status_unsupported_parameter;
83
84 if (output_scale != 0x1.0p-8f) {
85 xnn_log_error(
86 "failed to create %s operator with %.7g output scale: only output scale of 1/256 is supported",
87 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
88 goto error;
89 }
90
91 if (output_zero_point != 0) {
92 xnn_log_error(
93 "failed to create %s operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
94 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_zero_point);
95 goto error;
96 }
97
98 status = xnn_status_out_of_memory;
99
100 softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
101 if (softmax_op == NULL) {
102 xnn_log_error(
103 "failed to allocate %zu bytes for %s operator descriptor",
104 sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
105 goto error;
106 }
107
108 softmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
109 if (softmax_op->lookup_table == NULL) {
110 xnn_log_error(
111 "failed to allocate 256 bytes for %s operator lookup table",
112 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
113 goto error;
114 }
115
116 uint32_t* lookup_table = softmax_op->lookup_table;
117 const double qscale = fmin(((double) UINT32_MAX) / (double) channels, 8388607.0);
118 for (int32_t i = 0; i < 256; i++) {
119 const double scaled_exp_xi = qscale * exp((double) (i - 255) * (double) input_scale);
120 lookup_table[(uint32_t) i] = (uint32_t) lrint(scaled_exp_xi);
121 }
122
123 softmax_op->channels = channels;
124 softmax_op->input_pixel_stride = input_stride;
125 softmax_op->output_pixel_stride = output_stride;
126
127 softmax_op->type = xnn_operator_type_softmax_nc_qu8;
128 softmax_op->flags = flags;
129
130 softmax_op->state = xnn_run_state_invalid;
131
132 *softmax_op_out = softmax_op;
133 return xnn_status_success;
134
135 error:
136 xnn_delete_operator(softmax_op);
137 return status;
138 }
139
xnn_setup_softmax_nc_qu8(xnn_operator_t softmax_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)140 enum xnn_status xnn_setup_softmax_nc_qu8(
141 xnn_operator_t softmax_op,
142 size_t batch_size,
143 const uint8_t* input,
144 uint8_t* output,
145 pthreadpool_t threadpool)
146 {
147 if (softmax_op->type != xnn_operator_type_softmax_nc_qu8) {
148 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
149 xnn_operator_type_to_string(xnn_operator_type_softmax_nc_qu8),
150 xnn_operator_type_to_string(softmax_op->type));
151 return xnn_status_invalid_parameter;
152 }
153 softmax_op->state = xnn_run_state_invalid;
154
155 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
156 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
157 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
158 return xnn_status_uninitialized;
159 }
160
161 if (batch_size == 0) {
162 softmax_op->state = xnn_run_state_skip;
163 return xnn_status_success;
164 }
165
166 softmax_op->batch_size = batch_size;
167 softmax_op->input = input;
168 softmax_op->output = output;
169
170 softmax_op->context.u8_softmax = (struct u8_softmax_context) {
171 .n = softmax_op->channels,
172 .x = input,
173 .x_stride = softmax_op->input_pixel_stride * sizeof(uint8_t),
174 .t = softmax_op->lookup_table,
175 .y = output,
176 .y_stride = softmax_op->output_pixel_stride * sizeof(uint8_t),
177 .rmax_ukernel = xnn_params.u8.rmax,
178 .lut_norm_ukernel = xnn_params.u8.lut32norm,
179 };
180 softmax_op->compute.type = xnn_parallelization_type_1d;
181 softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_u8_softmax;
182 softmax_op->compute.range[0] = batch_size;
183 softmax_op->state = xnn_run_state_ready;
184
185 return xnn_status_success;
186 }
187
create_softmax_nc_floating_point(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * softmax_op_out)188 static enum xnn_status create_softmax_nc_floating_point(
189 size_t channels,
190 size_t input_stride,
191 size_t output_stride,
192 uint32_t flags,
193 uint32_t datatype_init_flags,
194 enum xnn_operator_type operator_type,
195 xnn_operator_t* softmax_op_out)
196 {
197 xnn_operator_t softmax_op = NULL;
198 enum xnn_status status = xnn_status_uninitialized;
199
200 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
201 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
202 xnn_operator_type_to_string(operator_type));
203 goto error;
204 }
205
206 status = xnn_status_unsupported_hardware;
207
208 if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
209 xnn_log_error("failed to create %s operator: operations on data type are not supported",
210 xnn_operator_type_to_string(operator_type));
211 goto error;
212 }
213
214 status = xnn_status_invalid_parameter;
215
216 if (channels == 0) {
217 xnn_log_error(
218 "failed to create %s operator with %zu channels: number of channels must be non-zero",
219 xnn_operator_type_to_string(operator_type), channels);
220 goto error;
221 }
222
223 if (input_stride < channels) {
224 xnn_log_error(
225 "failed to create %s operator with input element stride of %zu: "
226 "stride must be at least as large as the number of channels (%zu)",
227 xnn_operator_type_to_string(operator_type), input_stride, channels);
228 goto error;
229 }
230
231 if (output_stride < channels) {
232 xnn_log_error(
233 "failed to create %s operator with output element stride of %zu: "
234 "stride must be at least as large as the number of channels (%zu)",
235 xnn_operator_type_to_string(operator_type), output_stride, channels);
236 goto error;
237 }
238
239 status = xnn_status_out_of_memory;
240
241 softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
242 if (softmax_op == NULL) {
243 xnn_log_error(
244 "failed to allocate %zu bytes for %s operator descriptor",
245 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
246 goto error;
247 }
248
249 softmax_op->channels = channels;
250 softmax_op->input_pixel_stride = input_stride;
251 softmax_op->output_pixel_stride = output_stride;
252
253 softmax_op->type = operator_type;
254 softmax_op->flags = flags;
255
256 softmax_op->state = xnn_run_state_invalid;
257
258 *softmax_op_out = softmax_op;
259 return xnn_status_success;
260
261 error:
262 xnn_delete_operator(softmax_op);
263 return status;
264 }
265
xnn_create_softmax_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)266 enum xnn_status xnn_create_softmax_nc_f16(
267 size_t channels,
268 size_t input_stride,
269 size_t output_stride,
270 uint32_t flags,
271 xnn_operator_t* softmax_op_out)
272 {
273 return create_softmax_nc_floating_point(
274 channels, input_stride, output_stride,
275 flags,
276 XNN_INIT_FLAG_F16,
277 xnn_operator_type_softmax_nc_f16,
278 softmax_op_out);
279 }
280
xnn_create_softmax_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)281 enum xnn_status xnn_create_softmax_nc_f32(
282 size_t channels,
283 size_t input_stride,
284 size_t output_stride,
285 uint32_t flags,
286 xnn_operator_t* softmax_op_out)
287 {
288 return create_softmax_nc_floating_point(
289 channels, input_stride, output_stride,
290 flags,
291 XNN_INIT_FLAG_F32,
292 xnn_operator_type_softmax_nc_f32,
293 softmax_op_out);
294 }
295
setup_softmax_nc_floating_point(xnn_operator_t softmax_op,enum xnn_operator_type expected_operator_type,size_t batch_size,const void * input,void * output,uint32_t log2_element_size,xnn_rmax_ukernel_function rmax,const struct raddstoreexpminusmax_parameters raddstoreexpminusmax[restrict XNN_MIN_ELEMENTS (1)],const struct vbinary_parameters vmul[restrict XNN_MIN_ELEMENTS (1)],xnn_compute_reciprocal_function compute_reciprocal,const void * expminus_params,size_t expminus_params_size,const void * minmax_params,size_t minmax_params_size)296 static enum xnn_status setup_softmax_nc_floating_point(
297 xnn_operator_t softmax_op,
298 enum xnn_operator_type expected_operator_type,
299 size_t batch_size,
300 const void* input,
301 void* output,
302 uint32_t log2_element_size,
303 xnn_rmax_ukernel_function rmax,
304 const struct raddstoreexpminusmax_parameters raddstoreexpminusmax[restrict XNN_MIN_ELEMENTS(1)],
305 const struct vbinary_parameters vmul[restrict XNN_MIN_ELEMENTS(1)],
306 xnn_compute_reciprocal_function compute_reciprocal,
307 const void* expminus_params,
308 size_t expminus_params_size,
309 const void* minmax_params,
310 size_t minmax_params_size)
311 {
312 if (softmax_op->type != expected_operator_type) {
313 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
314 xnn_operator_type_to_string(expected_operator_type),
315 xnn_operator_type_to_string(softmax_op->type));
316 return xnn_status_invalid_parameter;
317 }
318 softmax_op->state = xnn_run_state_invalid;
319
320 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
321 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
322 xnn_operator_type_to_string(expected_operator_type));
323 return xnn_status_uninitialized;
324 }
325
326 if (batch_size == 0) {
327 softmax_op->state = xnn_run_state_skip;
328 return xnn_status_success;
329 }
330
331 softmax_op->batch_size = batch_size;
332 softmax_op->input = input;
333 softmax_op->output = output;
334
335 softmax_op->context.floating_point_softmax = (struct floating_point_softmax_context) {
336 .n = softmax_op->channels << log2_element_size,
337 .x = input,
338 .x_stride = softmax_op->input_pixel_stride << log2_element_size,
339 .y = output,
340 .y_stride = softmax_op->output_pixel_stride << log2_element_size,
341 .rmax_ukernel = rmax,
342 .raddstoreexpminusmax_ukernel = raddstoreexpminusmax->ukernel,
343 .compute_reciprocal = compute_reciprocal,
344 .vmulc_ukernel = vmul->minmax.opc_ukernel,
345 };
346 if (vmul->linear.opc_ukernel != NULL) {
347 softmax_op->context.floating_point_softmax.vmulc_ukernel = vmul->linear.opc_ukernel;
348 };
349 memcpy(&softmax_op->context.floating_point_softmax.expminus_params, expminus_params, expminus_params_size);
350 memcpy(&softmax_op->context.floating_point_softmax.minmax_params, minmax_params, minmax_params_size);
351 softmax_op->compute.type = xnn_parallelization_type_1d;
352 softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_floating_point_softmax;
353 softmax_op->compute.range[0] = batch_size;
354 softmax_op->state = xnn_run_state_ready;
355
356 return xnn_status_success;
357 }
358
compute_reciprocal_f16(const uint16_t input[XNN_MIN_ELEMENTS (1)],uint16_t output[XNN_MIN_ELEMENTS (1)])359 static void compute_reciprocal_f16(
360 const uint16_t input[XNN_MIN_ELEMENTS(1)],
361 uint16_t output[XNN_MIN_ELEMENTS(1)])
362 {
363 *output = fp16_ieee_from_fp32_value(1.0f / fp16_ieee_to_fp32_value(*input));
364 }
365
xnn_setup_softmax_nc_f16(xnn_operator_t softmax_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)366 enum xnn_status xnn_setup_softmax_nc_f16(
367 xnn_operator_t softmax_op,
368 size_t batch_size,
369 const void* input,
370 void* output,
371 pthreadpool_t threadpool)
372 {
373 union xnn_f16_expminus_params expminus_params;
374 if (xnn_params.f16.raddstoreexpminusmax.init.f16 != NULL) {
375 xnn_params.f16.raddstoreexpminusmax.init.f16(&expminus_params);
376 }
377 union xnn_f16_minmax_params minmax_params;
378 if (xnn_params.f16.vmul.init.f16_minmax != NULL) {
379 xnn_params.f16.vmul.init.f16_minmax(&minmax_params, UINT16_C(0xFC00), UINT16_C(0x7C00));
380 }
381 return setup_softmax_nc_floating_point(
382 softmax_op, xnn_operator_type_softmax_nc_f16,
383 batch_size, input, output,
384 1 /* log2(sizeof(uint16_t)) */,
385 xnn_params.f16.rmax, &xnn_params.f16.raddstoreexpminusmax, &xnn_params.f16.vmul,
386 (xnn_compute_reciprocal_function) compute_reciprocal_f16,
387 &expminus_params, sizeof(expminus_params),
388 &minmax_params, sizeof(minmax_params));
389 }
390
compute_reciprocal_f32(const float input[XNN_MIN_ELEMENTS (1)],float output[XNN_MIN_ELEMENTS (1)])391 static void compute_reciprocal_f32(
392 const float input[XNN_MIN_ELEMENTS(1)],
393 float output[XNN_MIN_ELEMENTS(1)])
394 {
395 *output = 1.0f / *input;
396 }
397
xnn_setup_softmax_nc_f32(xnn_operator_t softmax_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)398 enum xnn_status xnn_setup_softmax_nc_f32(
399 xnn_operator_t softmax_op,
400 size_t batch_size,
401 const float* input,
402 float* output,
403 pthreadpool_t threadpool)
404 {
405 union xnn_f32_expminus_params expminus_params;
406 if (xnn_params.f32.raddstoreexpminusmax.init.f32 != NULL) {
407 xnn_params.f32.raddstoreexpminusmax.init.f32(&expminus_params);
408 }
409 union xnn_f32_minmax_params minmax_params;
410 if (xnn_params.f32.vmul.init.f32_minmax != NULL) {
411 xnn_params.f32.vmul.init.f32_minmax(&minmax_params, -INFINITY, INFINITY);
412 }
413 return setup_softmax_nc_floating_point(
414 softmax_op, xnn_operator_type_softmax_nc_f32,
415 batch_size, input, output,
416 2 /* log2(sizeof(float)) */,
417 xnn_params.f32.rmax, &xnn_params.f32.raddstoreexpminusmax, &xnn_params.f32.vmul,
418 (xnn_compute_reciprocal_function) compute_reciprocal_f32,
419 &expminus_params, sizeof(expminus_params),
420 &minmax_params, sizeof(minmax_params));
421 }
422