• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) Facebook, Inc. and its affiliates.
2 // All rights reserved.
3 //
4 // Copyright 2019 Google LLC
5 //
6 // This source code is licensed under the BSD-style license found in the
7 // LICENSE file in the root directory of this source tree.
8 
9 #include <assert.h>
10 #include <math.h>
11 #include <stddef.h>
12 #include <stdint.h>
13 #include <stdlib.h>
14 
15 #include <fp16.h>
16 
17 #include <xnnpack.h>
18 #include <xnnpack/allocator.h>
19 #include <xnnpack/operator.h>
20 #include <xnnpack/log.h>
21 #include <xnnpack/microparams-init.h>
22 
23 
xnn_create_softmax_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * softmax_op_out)24 enum xnn_status xnn_create_softmax_nc_qu8(
25     size_t channels,
26     size_t input_stride,
27     size_t output_stride,
28     float input_scale,
29     uint8_t output_zero_point,
30     float output_scale,
31     uint32_t flags,
32     xnn_operator_t* softmax_op_out)
33 {
34   xnn_operator_t softmax_op = NULL;
35   enum xnn_status status = xnn_status_uninitialized;
36 
37   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
38     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
39       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
40     goto error;
41   }
42 
43   status = xnn_status_invalid_parameter;
44 
45   if (channels == 0) {
46     xnn_log_error(
47       "failed to create %s operator with %zu channels: number of channels must be non-zero",
48       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), channels);
49     goto error;
50   }
51 
52   if (input_stride < channels) {
53     xnn_log_error(
54       "failed to create %s operator with input element stride of %zu: "
55       "stride must be at least as large as the number of channels (%zu)",
56       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_stride, channels);
57     goto error;
58   }
59 
60   if (output_stride < channels) {
61     xnn_log_error(
62       "failed to create %s operator with output element stride of %zu: "
63       "stride must be at least as large as the number of channels (%zu)",
64       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_stride, channels);
65     goto error;
66   }
67 
68   if (input_scale <= 0.0f || !isnormal(input_scale)) {
69     xnn_log_error(
70       "failed to create %s operator with %.7g input scale: scale must be finite, normalized, and positive",
71       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), input_scale);
72     goto error;
73   }
74 
75   if (output_scale <= 0.0f || !isnormal(output_scale)) {
76     xnn_log_error(
77       "failed to create %s operator with %.7g output scale: scale must be finite, normalized, and positive",
78       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
79     goto error;
80   }
81 
82   status = xnn_status_unsupported_parameter;
83 
84   if (output_scale != 0x1.0p-8f) {
85     xnn_log_error(
86       "failed to create %s operator with %.7g output scale: only output scale of 1/256 is supported",
87       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_scale);
88     goto error;
89   }
90 
91   if (output_zero_point != 0) {
92     xnn_log_error(
93       "failed to create %s operator with %" PRIu8 " output zero point: only output zero point of 0 is supported",
94       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8), output_zero_point);
95     goto error;
96   }
97 
98   status = xnn_status_out_of_memory;
99 
100   softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
101   if (softmax_op == NULL) {
102     xnn_log_error(
103       "failed to allocate %zu bytes for %s operator descriptor",
104       sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
105     goto error;
106   }
107 
108   softmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
109   if (softmax_op->lookup_table == NULL) {
110     xnn_log_error(
111       "failed to allocate 256 bytes for %s operator lookup table",
112       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
113     goto error;
114   }
115 
116   uint32_t* lookup_table = softmax_op->lookup_table;
117   const double qscale = fmin(((double) UINT32_MAX) / (double) channels, 8388607.0);
118   for (int32_t i = 0; i < 256; i++) {
119     const double scaled_exp_xi = qscale * exp((double) (i - 255) * (double) input_scale);
120     lookup_table[(uint32_t) i] = (uint32_t) lrint(scaled_exp_xi);
121   }
122 
123   softmax_op->channels = channels;
124   softmax_op->input_pixel_stride = input_stride;
125   softmax_op->output_pixel_stride = output_stride;
126 
127   softmax_op->type = xnn_operator_type_softmax_nc_qu8;
128   softmax_op->flags = flags;
129 
130   softmax_op->state = xnn_run_state_invalid;
131 
132   *softmax_op_out = softmax_op;
133   return xnn_status_success;
134 
135 error:
136   xnn_delete_operator(softmax_op);
137   return status;
138 }
139 
xnn_setup_softmax_nc_qu8(xnn_operator_t softmax_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)140 enum xnn_status xnn_setup_softmax_nc_qu8(
141     xnn_operator_t softmax_op,
142     size_t batch_size,
143     const uint8_t* input,
144     uint8_t* output,
145     pthreadpool_t threadpool)
146 {
147   if (softmax_op->type != xnn_operator_type_softmax_nc_qu8) {
148     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
149       xnn_operator_type_to_string(xnn_operator_type_softmax_nc_qu8),
150       xnn_operator_type_to_string(softmax_op->type));
151     return xnn_status_invalid_parameter;
152   }
153   softmax_op->state = xnn_run_state_invalid;
154 
155   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
156     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
157       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_qu8));
158     return xnn_status_uninitialized;
159   }
160 
161   if (batch_size == 0) {
162     softmax_op->state = xnn_run_state_skip;
163     return xnn_status_success;
164   }
165 
166   softmax_op->batch_size = batch_size;
167   softmax_op->input = input;
168   softmax_op->output = output;
169 
170   softmax_op->context.u8_softmax = (struct u8_softmax_context) {
171     .n = softmax_op->channels,
172     .x = input,
173     .x_stride = softmax_op->input_pixel_stride * sizeof(uint8_t),
174     .t = softmax_op->lookup_table,
175     .y = output,
176     .y_stride = softmax_op->output_pixel_stride * sizeof(uint8_t),
177     .rmax_ukernel = xnn_params.u8.rmax,
178     .lut_norm_ukernel = xnn_params.u8.lut32norm,
179   };
180   softmax_op->compute.type = xnn_parallelization_type_1d;
181   softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_u8_softmax;
182   softmax_op->compute.range[0] = batch_size;
183   softmax_op->state = xnn_run_state_ready;
184 
185   return xnn_status_success;
186 }
187 
create_softmax_nc_floating_point(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_operator_t * softmax_op_out)188 static enum xnn_status create_softmax_nc_floating_point(
189     size_t channels,
190     size_t input_stride,
191     size_t output_stride,
192     uint32_t flags,
193     uint32_t datatype_init_flags,
194     enum xnn_operator_type operator_type,
195     xnn_operator_t* softmax_op_out)
196 {
197   xnn_operator_t softmax_op = NULL;
198   enum xnn_status status = xnn_status_uninitialized;
199 
200   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
201     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
202       xnn_operator_type_to_string(operator_type));
203     goto error;
204   }
205 
206   status = xnn_status_unsupported_hardware;
207 
208   if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
209     xnn_log_error("failed to create %s operator: operations on data type are not supported",
210       xnn_operator_type_to_string(operator_type));
211     goto error;
212   }
213 
214   status = xnn_status_invalid_parameter;
215 
216   if (channels == 0) {
217     xnn_log_error(
218       "failed to create %s operator with %zu channels: number of channels must be non-zero",
219       xnn_operator_type_to_string(operator_type), channels);
220     goto error;
221   }
222 
223   if (input_stride < channels) {
224     xnn_log_error(
225       "failed to create %s operator with input element stride of %zu: "
226       "stride must be at least as large as the number of channels (%zu)",
227       xnn_operator_type_to_string(operator_type), input_stride, channels);
228     goto error;
229   }
230 
231   if (output_stride < channels) {
232     xnn_log_error(
233       "failed to create %s operator with output element stride of %zu: "
234       "stride must be at least as large as the number of channels (%zu)",
235       xnn_operator_type_to_string(operator_type), output_stride, channels);
236     goto error;
237   }
238 
239   status = xnn_status_out_of_memory;
240 
241   softmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
242   if (softmax_op == NULL) {
243     xnn_log_error(
244       "failed to allocate %zu bytes for %s operator descriptor",
245       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
246     goto error;
247   }
248 
249   softmax_op->channels = channels;
250   softmax_op->input_pixel_stride = input_stride;
251   softmax_op->output_pixel_stride = output_stride;
252 
253   softmax_op->type = operator_type;
254   softmax_op->flags = flags;
255 
256   softmax_op->state = xnn_run_state_invalid;
257 
258   *softmax_op_out = softmax_op;
259   return xnn_status_success;
260 
261 error:
262   xnn_delete_operator(softmax_op);
263   return status;
264 }
265 
xnn_create_softmax_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)266 enum xnn_status xnn_create_softmax_nc_f16(
267     size_t channels,
268     size_t input_stride,
269     size_t output_stride,
270     uint32_t flags,
271     xnn_operator_t* softmax_op_out)
272 {
273   return create_softmax_nc_floating_point(
274     channels, input_stride, output_stride,
275     flags,
276     XNN_INIT_FLAG_F16,
277     xnn_operator_type_softmax_nc_f16,
278     softmax_op_out);
279 }
280 
xnn_create_softmax_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * softmax_op_out)281 enum xnn_status xnn_create_softmax_nc_f32(
282     size_t channels,
283     size_t input_stride,
284     size_t output_stride,
285     uint32_t flags,
286     xnn_operator_t* softmax_op_out)
287 {
288   return create_softmax_nc_floating_point(
289     channels, input_stride, output_stride,
290     flags,
291     XNN_INIT_FLAG_F32,
292     xnn_operator_type_softmax_nc_f32,
293     softmax_op_out);
294 }
295 
setup_softmax_nc_floating_point(xnn_operator_t softmax_op,enum xnn_operator_type expected_operator_type,size_t batch_size,const void * input,void * output,uint32_t log2_element_size,xnn_rmax_ukernel_function rmax,const struct raddstoreexpminusmax_parameters raddstoreexpminusmax[restrict XNN_MIN_ELEMENTS (1)],const struct vbinary_parameters vmul[restrict XNN_MIN_ELEMENTS (1)],xnn_compute_reciprocal_function compute_reciprocal,const void * expminus_params,size_t expminus_params_size,const void * minmax_params,size_t minmax_params_size)296 static enum xnn_status setup_softmax_nc_floating_point(
297     xnn_operator_t softmax_op,
298     enum xnn_operator_type expected_operator_type,
299     size_t batch_size,
300     const void* input,
301     void* output,
302     uint32_t log2_element_size,
303     xnn_rmax_ukernel_function rmax,
304     const struct raddstoreexpminusmax_parameters raddstoreexpminusmax[restrict XNN_MIN_ELEMENTS(1)],
305     const struct vbinary_parameters vmul[restrict XNN_MIN_ELEMENTS(1)],
306     xnn_compute_reciprocal_function compute_reciprocal,
307     const void* expminus_params,
308     size_t expminus_params_size,
309     const void* minmax_params,
310     size_t minmax_params_size)
311 {
312   if (softmax_op->type != expected_operator_type) {
313     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
314       xnn_operator_type_to_string(expected_operator_type),
315       xnn_operator_type_to_string(softmax_op->type));
316     return xnn_status_invalid_parameter;
317   }
318   softmax_op->state = xnn_run_state_invalid;
319 
320   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
321     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
322       xnn_operator_type_to_string(expected_operator_type));
323     return xnn_status_uninitialized;
324   }
325 
326   if (batch_size == 0) {
327     softmax_op->state = xnn_run_state_skip;
328     return xnn_status_success;
329   }
330 
331   softmax_op->batch_size = batch_size;
332   softmax_op->input = input;
333   softmax_op->output = output;
334 
335   softmax_op->context.floating_point_softmax = (struct floating_point_softmax_context) {
336     .n = softmax_op->channels << log2_element_size,
337     .x = input,
338     .x_stride = softmax_op->input_pixel_stride << log2_element_size,
339     .y = output,
340     .y_stride = softmax_op->output_pixel_stride << log2_element_size,
341     .rmax_ukernel = rmax,
342     .raddstoreexpminusmax_ukernel = raddstoreexpminusmax->ukernel,
343     .compute_reciprocal = compute_reciprocal,
344     .vmulc_ukernel = vmul->minmax.opc_ukernel,
345   };
346   if (vmul->linear.opc_ukernel != NULL) {
347     softmax_op->context.floating_point_softmax.vmulc_ukernel = vmul->linear.opc_ukernel;
348   };
349   memcpy(&softmax_op->context.floating_point_softmax.expminus_params, expminus_params, expminus_params_size);
350   memcpy(&softmax_op->context.floating_point_softmax.minmax_params, minmax_params, minmax_params_size);
351   softmax_op->compute.type = xnn_parallelization_type_1d;
352   softmax_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_floating_point_softmax;
353   softmax_op->compute.range[0] = batch_size;
354   softmax_op->state = xnn_run_state_ready;
355 
356   return xnn_status_success;
357 }
358 
compute_reciprocal_f16(const uint16_t input[XNN_MIN_ELEMENTS (1)],uint16_t output[XNN_MIN_ELEMENTS (1)])359 static void compute_reciprocal_f16(
360     const uint16_t input[XNN_MIN_ELEMENTS(1)],
361     uint16_t output[XNN_MIN_ELEMENTS(1)])
362 {
363   *output = fp16_ieee_from_fp32_value(1.0f / fp16_ieee_to_fp32_value(*input));
364 }
365 
xnn_setup_softmax_nc_f16(xnn_operator_t softmax_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)366 enum xnn_status xnn_setup_softmax_nc_f16(
367     xnn_operator_t softmax_op,
368     size_t batch_size,
369     const void* input,
370     void* output,
371     pthreadpool_t threadpool)
372 {
373   union xnn_f16_expminus_params expminus_params;
374   if (xnn_params.f16.raddstoreexpminusmax.init.f16 != NULL) {
375     xnn_params.f16.raddstoreexpminusmax.init.f16(&expminus_params);
376   }
377   union xnn_f16_minmax_params minmax_params;
378   if (xnn_params.f16.vmul.init.f16_minmax != NULL) {
379     xnn_params.f16.vmul.init.f16_minmax(&minmax_params, UINT16_C(0xFC00), UINT16_C(0x7C00));
380   }
381   return setup_softmax_nc_floating_point(
382     softmax_op, xnn_operator_type_softmax_nc_f16,
383     batch_size, input, output,
384     1 /* log2(sizeof(uint16_t)) */,
385     xnn_params.f16.rmax, &xnn_params.f16.raddstoreexpminusmax, &xnn_params.f16.vmul,
386     (xnn_compute_reciprocal_function) compute_reciprocal_f16,
387     &expminus_params, sizeof(expminus_params),
388     &minmax_params, sizeof(minmax_params));
389 }
390 
compute_reciprocal_f32(const float input[XNN_MIN_ELEMENTS (1)],float output[XNN_MIN_ELEMENTS (1)])391 static void compute_reciprocal_f32(
392     const float input[XNN_MIN_ELEMENTS(1)],
393     float output[XNN_MIN_ELEMENTS(1)])
394 {
395   *output = 1.0f / *input;
396 }
397 
xnn_setup_softmax_nc_f32(xnn_operator_t softmax_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)398 enum xnn_status xnn_setup_softmax_nc_f32(
399     xnn_operator_t softmax_op,
400     size_t batch_size,
401     const float* input,
402     float* output,
403     pthreadpool_t threadpool)
404 {
405   union xnn_f32_expminus_params expminus_params;
406   if (xnn_params.f32.raddstoreexpminusmax.init.f32 != NULL) {
407     xnn_params.f32.raddstoreexpminusmax.init.f32(&expminus_params);
408   }
409   union xnn_f32_minmax_params minmax_params;
410   if (xnn_params.f32.vmul.init.f32_minmax != NULL) {
411     xnn_params.f32.vmul.init.f32_minmax(&minmax_params, -INFINITY, INFINITY);
412   }
413   return setup_softmax_nc_floating_point(
414     softmax_op, xnn_operator_type_softmax_nc_f32,
415     batch_size, input, output,
416     2 /* log2(sizeof(float)) */,
417     xnn_params.f32.rmax, &xnn_params.f32.raddstoreexpminusmax, &xnn_params.f32.vmul,
418     (xnn_compute_reciprocal_function) compute_reciprocal_f32,
419     &expminus_params, sizeof(expminus_params),
420     &minmax_params, sizeof(minmax_params));
421 }
422