• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5 
6 #include <assert.h>
7 #include <math.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <stdlib.h>
11 
12 #include <fp16.h>
13 
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/log.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/microparams-init.h>
19 #include <xnnpack/params.h>
20 
21 
create_unary_elementwise_nc(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_vunary_ukernel_function ukernel,xnn_operator_t * unary_elementwise_op_out)22 static enum xnn_status create_unary_elementwise_nc(
23     size_t channels,
24     size_t input_stride,
25     size_t output_stride,
26     uint32_t flags,
27     const void* params,
28     size_t params_size,
29     uint32_t datatype_init_flags,
30     enum xnn_operator_type operator_type,
31     xnn_vunary_ukernel_function ukernel,
32     xnn_operator_t* unary_elementwise_op_out)
33 {
34   xnn_operator_t unary_elementwise_op = NULL;
35 
36   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
37     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
38       xnn_operator_type_to_string(operator_type));
39     return xnn_status_uninitialized;
40   }
41 
42   if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
43     xnn_log_error("failed to create %s operator: operations on data type are not supported",
44       xnn_operator_type_to_string(operator_type));
45     return xnn_status_unsupported_hardware;
46   }
47 
48   if (channels == 0) {
49     xnn_log_error(
50       "failed to create %s operator with %zu channels: number of channels must be non-zero",
51       xnn_operator_type_to_string(operator_type), channels);
52     return xnn_status_invalid_parameter;
53   }
54 
55   if (input_stride < channels) {
56     xnn_log_error(
57       "failed to create %s operator with input element stride of %zu: "
58       "stride must be at least as large as the number of channels (%zu)",
59       xnn_operator_type_to_string(operator_type), input_stride, channels);
60     return xnn_status_invalid_parameter;
61   }
62 
63   if (output_stride < channels) {
64     xnn_log_error(
65       "failed to create %s operator with output element stride of %zu: "
66       "stride must be at least as large as the number of channels (%zu)",
67       xnn_operator_type_to_string(operator_type), output_stride, channels);
68     return xnn_status_invalid_parameter;
69   }
70 
71   unary_elementwise_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
72   if (unary_elementwise_op == NULL) {
73     xnn_log_error(
74       "failed to allocate %zu bytes for %s operator descriptor",
75       sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
76     return xnn_status_out_of_memory;
77   }
78 
79   unary_elementwise_op->channels = channels;
80   unary_elementwise_op->input_pixel_stride = input_stride;
81   unary_elementwise_op->output_pixel_stride = output_stride;
82   if (params_size != 0) {
83     memcpy(&unary_elementwise_op->params, params, params_size);
84   }
85 
86   unary_elementwise_op->ukernel.vunary.function = ukernel;
87   unary_elementwise_op->type = operator_type;
88   unary_elementwise_op->flags = flags;
89 
90   unary_elementwise_op->state = xnn_run_state_invalid;
91 
92   *unary_elementwise_op_out = unary_elementwise_op;
93   return xnn_status_success;
94 }
95 
setup_unary_elementwise_nc(xnn_operator_t unary_elementwise_op,enum xnn_operator_type expected_operator_type,size_t batch_size,const void * input,void * output,uint32_t log2_input_size,uint32_t log2_output_size,const void * params,size_t params_size,size_t num_threads)96 static enum xnn_status setup_unary_elementwise_nc(
97     xnn_operator_t unary_elementwise_op,
98     enum xnn_operator_type expected_operator_type,
99     size_t batch_size,
100     const void* input,
101     void* output,
102     uint32_t log2_input_size,
103     uint32_t log2_output_size,
104     const void* params,
105     size_t params_size,
106     size_t num_threads)
107 {
108   if (unary_elementwise_op->type != expected_operator_type) {
109     xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
110       xnn_operator_type_to_string(expected_operator_type),
111       xnn_operator_type_to_string(unary_elementwise_op->type));
112     return xnn_status_invalid_parameter;
113   }
114   unary_elementwise_op->state = xnn_run_state_invalid;
115 
116   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
117     xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
118       xnn_operator_type_to_string(unary_elementwise_op->type));
119     return xnn_status_uninitialized;
120   }
121 
122   if (batch_size == 0) {
123     unary_elementwise_op->state = xnn_run_state_skip;
124     return xnn_status_success;
125   }
126 
127   const size_t channels = unary_elementwise_op->channels;
128   const size_t input_stride = unary_elementwise_op->input_pixel_stride;
129   const size_t output_stride = unary_elementwise_op->output_pixel_stride;
130 
131   xnn_vunary_ukernel_function ukernel = unary_elementwise_op->ukernel.vunary.function;
132 
133   if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
134     const size_t block_size = 4096;
135     unary_elementwise_op->context.univector_contiguous = (struct univector_contiguous_context) {
136       .x = input,
137       .y = output,
138       .log2_xsize = log2_input_size,
139       .log2_ysize = log2_output_size,
140       .ukernel = ukernel,
141     };
142     if (params_size != 0) {
143       memcpy(&unary_elementwise_op->context.univector_contiguous.params, params, params_size);
144     }
145 
146     const size_t range = (batch_size * channels) << log2_input_size;
147     unary_elementwise_op->compute.type = xnn_parallelization_type_1d_tile_1d;
148     unary_elementwise_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
149     unary_elementwise_op->compute.range[0] = range;
150     unary_elementwise_op->compute.tile[0] = (num_threads == 1) ? range : block_size;
151   } else {
152     unary_elementwise_op->context.univector_strided = (struct univector_strided_context) {
153       .n = channels << log2_input_size,
154       .x = input,
155       .x_stride = input_stride << log2_input_size,
156       .y = output,
157       .y_stride = output_stride << log2_output_size,
158       .ukernel = ukernel,
159     };
160     if (params_size != 0) {
161       memcpy(&unary_elementwise_op->context.univector_strided.params, params, params_size);
162     }
163     unary_elementwise_op->compute.type = xnn_parallelization_type_1d_tile_1d;
164     unary_elementwise_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
165     unary_elementwise_op->compute.range[0] = batch_size;
166     unary_elementwise_op->compute.tile[0] = (num_threads == 1) ? batch_size : 1;
167   }
168   unary_elementwise_op->state = xnn_run_state_ready;
169 
170   return xnn_status_success;
171 }
172 
xnn_create_clamp_nc_f16(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * clamp_op_out)173 enum xnn_status xnn_create_clamp_nc_f16(
174     size_t channels,
175     size_t input_stride,
176     size_t output_stride,
177     float output_min,
178     float output_max,
179     uint32_t flags,
180     xnn_operator_t* clamp_op_out)
181 {
182   if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
183     xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
184       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
185     return xnn_status_uninitialized;
186   }
187 
188   if ((xnn_params.init_flags & XNN_INIT_FLAG_F16) != XNN_INIT_FLAG_F16) {
189     xnn_log_error("failed to create %s operator: operations on data type are not supported",
190       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
191     return xnn_status_unsupported_hardware;
192   }
193 
194   if (isnan(output_min)) {
195     xnn_log_error(
196       "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
197       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
198     return xnn_status_invalid_parameter;
199   }
200 
201   if (isnan(output_max)) {
202     xnn_log_error(
203       "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
204       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
205     return xnn_status_invalid_parameter;
206   }
207 
208   const uint16_t output_min_as_half = fp16_ieee_from_fp32_value(output_min);
209   const uint16_t output_max_as_half = fp16_ieee_from_fp32_value(output_max);
210   output_min = fp16_ieee_to_fp32_value(output_min_as_half);
211   output_max = fp16_ieee_to_fp32_value(output_max_as_half);
212   if (output_min >= output_max) {
213     xnn_log_error(
214       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
215       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16), output_min, output_max);
216     return xnn_status_invalid_parameter;
217   }
218 
219   union xnn_f16_minmax_params params;
220   if (xnn_params.f16.clamp.init.f16_minmax != NULL) {
221     xnn_params.f16.clamp.init.f16_minmax(&params, output_min_as_half, output_max_as_half);
222   }
223   return create_unary_elementwise_nc(
224     channels, input_stride, output_stride, flags,
225     &params, sizeof(params), XNN_INIT_FLAG_F16,
226     xnn_operator_type_clamp_nc_f16,
227     xnn_params.f16.clamp.ukernel,
228     clamp_op_out);
229 }
230 
xnn_create_clamp_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * clamp_op_out)231 enum xnn_status xnn_create_clamp_nc_f32(
232     size_t channels,
233     size_t input_stride,
234     size_t output_stride,
235     float output_min,
236     float output_max,
237     uint32_t flags,
238     xnn_operator_t* clamp_op_out)
239 {
240   if (isnan(output_min)) {
241     xnn_log_error(
242       "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
243       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f32));
244     return xnn_status_invalid_parameter;
245   }
246 
247   if (isnan(output_max)) {
248     xnn_log_error(
249       "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
250       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f32));
251     return xnn_status_invalid_parameter;
252   }
253 
254   if (output_min >= output_max) {
255     xnn_log_error(
256       "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
257       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f32), output_min, output_max);
258     return xnn_status_invalid_parameter;
259   }
260 
261   const bool relu_activation = (output_max == INFINITY) && (output_min == 0.0f);
262   xnn_vunary_ukernel_function clamp_ukernel = xnn_params.f32.clamp.ukernel;
263   if (relu_activation && xnn_params.f32.relu.ukernel != NULL) {
264     clamp_ukernel = xnn_params.f32.relu.ukernel;
265   }
266 
267   union xnn_f32_minmax_params params;
268   if (xnn_params.f32.clamp.init.f32_minmax != NULL) {
269     xnn_params.f32.clamp.init.f32_minmax(&params, output_min, output_max);
270   }
271   return create_unary_elementwise_nc(
272     channels, input_stride, output_stride, flags,
273     &params, sizeof(params), XNN_INIT_FLAG_F32,
274     xnn_operator_type_clamp_nc_f32,
275     clamp_ukernel,
276     clamp_op_out);
277 }
278 
xnn_create_clamp_nc_s8(size_t channels,size_t input_stride,size_t output_stride,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * clamp_op_out)279 enum xnn_status xnn_create_clamp_nc_s8(
280     size_t channels,
281     size_t input_stride,
282     size_t output_stride,
283     int8_t output_min,
284     int8_t output_max,
285     uint32_t flags,
286     xnn_operator_t* clamp_op_out)
287 {
288   if (output_min >= output_max) {
289     xnn_log_error(
290       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
291       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_s8), output_min, output_max);
292     return xnn_status_invalid_parameter;
293   }
294 
295   union xnn_s8_minmax_params params;
296   if (xnn_params.s8.clamp.init.s8_minmax != NULL) {
297     xnn_params.s8.clamp.init.s8_minmax(&params, output_min, output_max);
298   }
299   return create_unary_elementwise_nc(
300     channels, input_stride, output_stride, flags,
301     &params, sizeof(params), XNN_INIT_FLAG_S8,
302     xnn_operator_type_clamp_nc_s8,
303     xnn_params.s8.clamp.ukernel,
304     clamp_op_out);
305 }
306 
xnn_create_clamp_nc_u8(size_t channels,size_t input_stride,size_t output_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * clamp_op_out)307 enum xnn_status xnn_create_clamp_nc_u8(
308     size_t channels,
309     size_t input_stride,
310     size_t output_stride,
311     uint8_t output_min,
312     uint8_t output_max,
313     uint32_t flags,
314     xnn_operator_t* clamp_op_out)
315 {
316   if (output_min >= output_max) {
317     xnn_log_error(
318       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
319       xnn_operator_type_to_string(xnn_operator_type_clamp_nc_u8), output_min, output_max);
320     return xnn_status_invalid_parameter;
321   }
322 
323   union xnn_u8_minmax_params params;
324   if (xnn_params.u8.clamp.init.u8_minmax != NULL) {
325     xnn_params.u8.clamp.init.u8_minmax(&params, output_min, output_max);
326   }
327   return create_unary_elementwise_nc(
328     channels, input_stride, output_stride, flags,
329     &params, sizeof(params), XNN_INIT_FLAG_U8,
330     xnn_operator_type_clamp_nc_u8,
331     xnn_params.u8.clamp.ukernel,
332     clamp_op_out);
333 }
334 
xnn_create_abs_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * abs_op_out)335 enum xnn_status xnn_create_abs_nc_f16(
336     size_t channels,
337     size_t input_stride,
338     size_t output_stride,
339     uint32_t flags,
340     xnn_operator_t* abs_op_out)
341 {
342   union xnn_f16_abs_params params;
343   if (xnn_params.f16.abs.init.f16_abs != NULL) {
344     xnn_params.f16.abs.init.f16_abs(&params);
345   }
346   return create_unary_elementwise_nc(
347     channels, input_stride, output_stride, flags,
348     &params, sizeof(params), XNN_INIT_FLAG_F16,
349     xnn_operator_type_abs_nc_f16,
350     xnn_params.f16.abs.ukernel,
351     abs_op_out);
352 }
353 
xnn_create_abs_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * abs_op_out)354 enum xnn_status xnn_create_abs_nc_f32(
355     size_t channels,
356     size_t input_stride,
357     size_t output_stride,
358     uint32_t flags,
359     xnn_operator_t* abs_op_out)
360 {
361   union xnn_f32_abs_params params;
362   if (xnn_params.f32.abs.init.f32_abs != NULL) {
363     xnn_params.f32.abs.init.f32_abs(&params);
364   }
365   return create_unary_elementwise_nc(
366     channels, input_stride, output_stride, flags,
367     &params, sizeof(params), XNN_INIT_FLAG_F32,
368     xnn_operator_type_abs_nc_f32,
369     xnn_params.f32.abs.ukernel,
370     abs_op_out);
371 }
372 
xnn_create_bankers_rounding_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * rounding_op_out)373 enum xnn_status xnn_create_bankers_rounding_nc_f16(
374     size_t channels,
375     size_t input_stride,
376     size_t output_stride,
377     uint32_t flags,
378     xnn_operator_t* rounding_op_out)
379 {
380   return create_unary_elementwise_nc(
381     channels, input_stride, output_stride, flags,
382     NULL, 0, XNN_INIT_FLAG_F16,
383     xnn_operator_type_bankers_rounding_nc_f16,
384     xnn_params.f16.rndne.ukernel,
385     rounding_op_out);
386 }
387 
xnn_create_bankers_rounding_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * rounding_op_out)388 enum xnn_status xnn_create_bankers_rounding_nc_f32(
389     size_t channels,
390     size_t input_stride,
391     size_t output_stride,
392     uint32_t flags,
393     xnn_operator_t* rounding_op_out)
394 {
395   union xnn_f32_rnd_params params;
396   if (xnn_params.f32.rndne.init.f32_rnd != NULL) {
397     xnn_params.f32.rndne.init.f32_rnd(&params);
398   }
399   return create_unary_elementwise_nc(
400     channels, input_stride, output_stride, flags,
401     &params, sizeof(params), XNN_INIT_FLAG_F32,
402     xnn_operator_type_bankers_rounding_nc_f32,
403     xnn_params.f32.rndne.ukernel,
404     rounding_op_out);
405 }
406 
xnn_create_ceiling_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * ceiling_op_out)407 enum xnn_status xnn_create_ceiling_nc_f16(
408     size_t channels,
409     size_t input_stride,
410     size_t output_stride,
411     uint32_t flags,
412     xnn_operator_t* ceiling_op_out)
413 {
414   return create_unary_elementwise_nc(
415     channels, input_stride, output_stride, flags,
416     NULL, 0, XNN_INIT_FLAG_F16,
417     xnn_operator_type_ceiling_nc_f16,
418     xnn_params.f16.rndu.ukernel,
419     ceiling_op_out);
420 }
421 
xnn_create_ceiling_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * ceiling_op_out)422 enum xnn_status xnn_create_ceiling_nc_f32(
423     size_t channels,
424     size_t input_stride,
425     size_t output_stride,
426     uint32_t flags,
427     xnn_operator_t* ceiling_op_out)
428 {
429   union xnn_f32_rnd_params params;
430   if (xnn_params.f32.rndu.init.f32_rnd != NULL) {
431     xnn_params.f32.rndu.init.f32_rnd(&params);
432   }
433   return create_unary_elementwise_nc(
434     channels, input_stride, output_stride, flags,
435     &params, sizeof(params), XNN_INIT_FLAG_F32,
436     xnn_operator_type_ceiling_nc_f32,
437     xnn_params.f32.rndu.ukernel,
438     ceiling_op_out);
439 }
440 
xnn_create_convert_nc_f16_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * convert_op_out)441 enum xnn_status xnn_create_convert_nc_f16_f32(
442   size_t channels,
443   size_t input_stride,
444   size_t output_stride,
445   uint32_t flags,
446   xnn_operator_t* convert_op_out)
447 {
448   union xnn_f16_f32_cvt_params params;
449   if (xnn_params.vcvt.f16_to_f32.init.f16_f32_cvt != NULL) {
450     xnn_params.vcvt.f16_to_f32.init.f16_f32_cvt(&params);
451   }
452   return create_unary_elementwise_nc(
453     channels, input_stride, output_stride, flags,
454     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
455     xnn_operator_type_convert_nc_f16_f32,
456     xnn_params.vcvt.f16_to_f32.ukernel,
457     convert_op_out);
458 }
459 
xnn_create_convert_nc_f32_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * convert_op_out)460 enum xnn_status xnn_create_convert_nc_f32_f16(
461   size_t channels,
462   size_t input_stride,
463   size_t output_stride,
464   uint32_t flags,
465   xnn_operator_t* convert_op_out)
466 {
467   union xnn_f32_f16_cvt_params params;
468   if (xnn_params.vcvt.f32_to_f16.init.f32_f16_cvt != NULL) {
469     xnn_params.vcvt.f32_to_f16.init.f32_f16_cvt(&params);
470   }
471   return create_unary_elementwise_nc(
472     channels, input_stride, output_stride, flags,
473     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
474     xnn_operator_type_convert_nc_f32_f16,
475     xnn_params.vcvt.f32_to_f16.ukernel,
476     convert_op_out);
477 }
478 
xnn_create_convert_nc_f32_qs8(size_t channels,size_t input_stride,size_t output_stride,float output_scale,int8_t output_zero_point,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * convert_op_out)479 enum xnn_status xnn_create_convert_nc_f32_qs8(
480   size_t channels,
481   size_t input_stride,
482   size_t output_stride,
483   float output_scale,
484   int8_t output_zero_point,
485   int8_t output_min,
486   int8_t output_max,
487   uint32_t flags,
488   xnn_operator_t* convert_op_out)
489 {
490   if (output_scale <= 0.0f || !isnormal(output_scale)) {
491     xnn_log_error(
492       "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
493       xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qs8), output_scale);
494     return xnn_status_invalid_parameter;
495   }
496 
497   if (output_min >= output_max) {
498     xnn_log_error(
499       "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
500       xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qs8), output_min, output_max);
501     return xnn_status_invalid_parameter;
502   }
503 
504   union xnn_f32_qs8_cvt_params params;
505   if (xnn_params.vcvt.f32_to_qs8.init.f32_qs8_cvt != NULL) {
506     xnn_params.vcvt.f32_to_qs8.init.f32_qs8_cvt(&params, 1.0f / output_scale, output_zero_point, output_min, output_max);
507   }
508   return create_unary_elementwise_nc(
509     channels, input_stride, output_stride, flags,
510     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
511     xnn_operator_type_convert_nc_f32_qs8,
512     xnn_params.vcvt.f32_to_qs8.ukernel,
513     convert_op_out);
514 }
515 
xnn_create_convert_nc_f32_qu8(size_t channels,size_t input_stride,size_t output_stride,float output_scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * convert_op_out)516 enum xnn_status xnn_create_convert_nc_f32_qu8(
517   size_t channels,
518   size_t input_stride,
519   size_t output_stride,
520   float output_scale,
521   uint8_t output_zero_point,
522   uint8_t output_min,
523   uint8_t output_max,
524   uint32_t flags,
525   xnn_operator_t* convert_op_out)
526 {
527   if (output_scale <= 0.0f || !isnormal(output_scale)) {
528     xnn_log_error(
529       "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
530       xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qu8), output_scale);
531     return xnn_status_invalid_parameter;
532   }
533 
534   if (output_min >= output_max) {
535     xnn_log_error(
536       "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
537       xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qu8), output_min, output_max);
538     return xnn_status_invalid_parameter;
539   }
540 
541   union xnn_f32_qu8_cvt_params params;
542   if (xnn_params.vcvt.f32_to_qu8.init.f32_qu8_cvt != NULL) {
543     xnn_params.vcvt.f32_to_qu8.init.f32_qu8_cvt(&params, 1.0f / output_scale, output_zero_point, output_min, output_max);
544   }
545   return create_unary_elementwise_nc(
546     channels, input_stride, output_stride, flags,
547     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
548     xnn_operator_type_convert_nc_f32_qu8,
549     xnn_params.vcvt.f32_to_qu8.ukernel,
550     convert_op_out);
551 }
552 
xnn_create_convert_nc_qs8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,int8_t input_zero_point,float output_scale,int8_t output_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)553 enum xnn_status xnn_create_convert_nc_qs8(
554   size_t channels,
555   size_t input_stride,
556   size_t output_stride,
557   float input_scale,
558   int8_t input_zero_point,
559   float output_scale,
560   int8_t output_zero_point,
561   uint32_t flags,
562   xnn_operator_t* convert_op_out)
563 {
564   if (input_scale <= 0.0f || !isnormal(input_scale)) {
565     xnn_log_error(
566       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
567       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8), input_scale);
568     return xnn_status_invalid_parameter;
569   }
570 
571   if (output_scale <= 0.0f || !isnormal(output_scale)) {
572     xnn_log_error(
573       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
574       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8), output_scale);
575     return xnn_status_invalid_parameter;
576   }
577 
578   const float input_output_scale = input_scale / output_scale;
579   if (input_output_scale < 0x1.0p-8f || input_output_scale > 0x1.0p+7f) {
580     xnn_log_error(
581       "failed to create %s operator with %.7g input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
582       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8), input_output_scale);
583     return xnn_status_invalid_parameter;
584   }
585 
586   union xnn_qs8_cvt_params params;
587   if (xnn_params.vcvt.qs8.init.qs8_cvt != NULL) {
588     xnn_params.vcvt.qs8.init.qs8_cvt(&params, input_output_scale, input_zero_point, output_zero_point);
589   }
590   return create_unary_elementwise_nc(
591     channels, input_stride, output_stride, flags,
592     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
593     xnn_operator_type_convert_nc_qs8,
594     xnn_params.vcvt.qs8.ukernel,
595     convert_op_out);
596 }
597 
xnn_create_convert_nc_qs8_f32(size_t channels,size_t input_stride,size_t output_stride,float input_scale,int8_t input_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)598 enum xnn_status xnn_create_convert_nc_qs8_f32(
599   size_t channels,
600   size_t input_stride,
601   size_t output_stride,
602   float input_scale,
603   int8_t input_zero_point,
604   uint32_t flags,
605   xnn_operator_t* convert_op_out)
606 {
607   if (input_scale <= 0.0f || !isnormal(input_scale)) {
608     xnn_log_error(
609       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
610       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8_f32), input_scale);
611     return xnn_status_invalid_parameter;
612   }
613 
614   union xnn_qs8_f32_cvt_params params;
615   if (xnn_params.vcvt.qs8_to_f32.init.qs8_f32_cvt != NULL) {
616     xnn_params.vcvt.qs8_to_f32.init.qs8_f32_cvt(&params, input_scale, input_zero_point);
617   }
618   return create_unary_elementwise_nc(
619     channels, input_stride, output_stride, flags,
620     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
621     xnn_operator_type_convert_nc_qs8_f32,
622     xnn_params.vcvt.qs8_to_f32.ukernel,
623     convert_op_out);
624 }
625 
xnn_create_convert_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t input_zero_point,float output_scale,uint8_t output_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)626 enum xnn_status xnn_create_convert_nc_qu8(
627   size_t channels,
628   size_t input_stride,
629   size_t output_stride,
630   float input_scale,
631   uint8_t input_zero_point,
632   float output_scale,
633   uint8_t output_zero_point,
634   uint32_t flags,
635   xnn_operator_t* convert_op_out)
636 {
637   if (input_scale <= 0.0f || !isnormal(input_scale)) {
638     xnn_log_error(
639       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
640       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8), input_scale);
641     return xnn_status_invalid_parameter;
642   }
643 
644   if (output_scale <= 0.0f || !isnormal(output_scale)) {
645     xnn_log_error(
646       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
647       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8), output_scale);
648     return xnn_status_invalid_parameter;
649   }
650 
651   const float input_output_scale = input_scale / output_scale;
652   if (input_output_scale < 0x1.0p-8f || input_output_scale > 0x1.0p+7f) {
653     xnn_log_error(
654       "failed to create %s operator with %.7g input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
655       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8), input_output_scale);
656     return xnn_status_invalid_parameter;
657   }
658 
659   union xnn_qu8_cvt_params params;
660   if (xnn_params.vcvt.qu8.init.qu8_cvt != NULL) {
661     xnn_params.vcvt.qu8.init.qu8_cvt(&params, input_output_scale, input_zero_point, output_zero_point);
662   }
663   return create_unary_elementwise_nc(
664     channels, input_stride, output_stride, flags,
665     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
666     xnn_operator_type_convert_nc_qu8,
667     xnn_params.vcvt.qu8.ukernel,
668     convert_op_out);
669 }
670 
xnn_create_convert_nc_qu8_f32(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t input_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)671 enum xnn_status xnn_create_convert_nc_qu8_f32(
672   size_t channels,
673   size_t input_stride,
674   size_t output_stride,
675   float input_scale,
676   uint8_t input_zero_point,
677   uint32_t flags,
678   xnn_operator_t* convert_op_out)
679 {
680   if (input_scale <= 0.0f || !isnormal(input_scale)) {
681     xnn_log_error(
682       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
683       xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8_f32), input_scale);
684     return xnn_status_invalid_parameter;
685   }
686 
687   union xnn_qu8_f32_cvt_params params;
688   if (xnn_params.vcvt.qu8_to_f32.init.qu8_f32_cvt != NULL) {
689     xnn_params.vcvt.qu8_to_f32.init.qu8_f32_cvt(&params, input_scale, input_zero_point);
690   }
691   return create_unary_elementwise_nc(
692     channels, input_stride, output_stride, flags,
693     &params, sizeof(params), XNN_INIT_FLAG_VCVT,
694     xnn_operator_type_convert_nc_qu8_f32,
695     xnn_params.vcvt.qu8_to_f32.ukernel,
696     convert_op_out);
697 }
698 
xnn_create_copy_nc_x8(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * copy_op_out)699 enum xnn_status xnn_create_copy_nc_x8(
700     size_t channels,
701     size_t input_stride,
702     size_t output_stride,
703     uint32_t flags,
704     xnn_operator_t* copy_op_out)
705 {
706   return create_unary_elementwise_nc(
707     channels, input_stride, output_stride, flags,
708     NULL, 0, XNN_INIT_FLAG_X8,
709     xnn_operator_type_copy_nc_x8,
710     xnn_params.xx.copy,
711     copy_op_out);
712 }
713 
xnn_create_copy_nc_x16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * copy_op_out)714 enum xnn_status xnn_create_copy_nc_x16(
715     size_t channels,
716     size_t input_stride,
717     size_t output_stride,
718     uint32_t flags,
719     xnn_operator_t* copy_op_out)
720 {
721   return create_unary_elementwise_nc(
722     channels, input_stride, output_stride, flags,
723     NULL, 0, XNN_INIT_FLAG_X16,
724     xnn_operator_type_copy_nc_x16,
725     xnn_params.xx.copy,
726     copy_op_out);
727 }
728 
xnn_create_copy_nc_x32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * copy_op_out)729 enum xnn_status xnn_create_copy_nc_x32(
730     size_t channels,
731     size_t input_stride,
732     size_t output_stride,
733     uint32_t flags,
734     xnn_operator_t* copy_op_out)
735 {
736   return create_unary_elementwise_nc(
737     channels, input_stride, output_stride, flags,
738     NULL, 0, XNN_INIT_FLAG_X32,
739     xnn_operator_type_copy_nc_x32,
740     xnn_params.xx.copy,
741     copy_op_out);
742 }
743 
xnn_create_elu_nc_f16(size_t channels,size_t input_stride,size_t output_stride,float alpha,uint32_t flags,xnn_operator_t * elu_op_out)744 enum xnn_status xnn_create_elu_nc_f16(
745   size_t channels,
746   size_t input_stride,
747   size_t output_stride,
748   float alpha,
749   uint32_t flags,
750   xnn_operator_t* elu_op_out)
751 {
752   const uint16_t alpha_as_half = fp16_ieee_from_fp32_value(alpha);
753   alpha = fp16_ieee_to_fp32_value(alpha_as_half);
754   if (alpha <= 0.0f || !isnormal(alpha)) {
755     xnn_log_error(
756       "failed to create %s operator with %.7g alpha parameter: alpha must be finite, normalized, and positive",
757       xnn_operator_type_to_string(xnn_operator_type_elu_nc_f16), alpha);
758     return xnn_status_invalid_parameter;
759   }
760 
761   union xnn_f16_elu_params params;
762   if (xnn_params.f16.elu.init.f16_elu != NULL) {
763     xnn_params.f16.elu.init.f16_elu(&params,
764       UINT16_C(0x3C00)  /* prescale = 1.0h */, alpha_as_half, UINT16_C(0x3C00)  /* beta = 1.0h */);
765   }
766   return create_unary_elementwise_nc(
767     channels, input_stride, output_stride, flags,
768     &params, sizeof(params), XNN_INIT_FLAG_F16,
769     xnn_operator_type_elu_nc_f16,
770     xnn_params.f16.elu.ukernel,
771     elu_op_out);
772 }
773 
xnn_create_elu_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float alpha,uint32_t flags,xnn_operator_t * elu_op_out)774 enum xnn_status xnn_create_elu_nc_f32(
775   size_t channels,
776   size_t input_stride,
777   size_t output_stride,
778   float alpha,
779   uint32_t flags,
780   xnn_operator_t* elu_op_out)
781 {
782   if (alpha <= 0.0f || !isnormal(alpha)) {
783     xnn_log_error(
784       "failed to create %s operator with %.7g alpha parameter: alpha must be finite, normalized, and positive",
785       xnn_operator_type_to_string(xnn_operator_type_elu_nc_f32), alpha);
786     return xnn_status_invalid_parameter;
787   }
788 
789   union xnn_f32_elu_params params;
790   if (xnn_params.f32.elu.init.f32_elu != NULL) {
791     xnn_params.f32.elu.init.f32_elu(&params, 1.0f /* prescale */, alpha, 1.0f /* beta */);
792   }
793   return create_unary_elementwise_nc(
794     channels, input_stride, output_stride, flags,
795     &params, sizeof(params), XNN_INIT_FLAG_F32,
796     xnn_operator_type_elu_nc_f32,
797     xnn_params.f32.elu.ukernel,
798     elu_op_out);
799 }
800 
xnn_create_floor_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * floor_op_out)801 enum xnn_status xnn_create_floor_nc_f16(
802     size_t channels,
803     size_t input_stride,
804     size_t output_stride,
805     uint32_t flags,
806     xnn_operator_t* floor_op_out)
807 {
808   return create_unary_elementwise_nc(
809     channels, input_stride, output_stride, flags,
810     NULL, 0, XNN_INIT_FLAG_F16,
811     xnn_operator_type_floor_nc_f16,
812     xnn_params.f16.rndd.ukernel,
813     floor_op_out);
814 }
815 
xnn_create_floor_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * floor_op_out)816 enum xnn_status xnn_create_floor_nc_f32(
817     size_t channels,
818     size_t input_stride,
819     size_t output_stride,
820     uint32_t flags,
821     xnn_operator_t* floor_op_out)
822 {
823   union xnn_f32_rnd_params params;
824   if (xnn_params.f32.rndd.init.f32_rnd != NULL) {
825     xnn_params.f32.rndd.init.f32_rnd(&params);
826   }
827   return create_unary_elementwise_nc(
828     channels, input_stride, output_stride, flags,
829     &params, sizeof(params), XNN_INIT_FLAG_F32,
830     xnn_operator_type_floor_nc_f32,
831     xnn_params.f32.rndd.ukernel,
832     floor_op_out);
833 }
834 
xnn_create_hardswish_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * hardswish_op_out)835 enum xnn_status xnn_create_hardswish_nc_f16(
836     size_t channels,
837     size_t input_stride,
838     size_t output_stride,
839     uint32_t flags,
840     xnn_operator_t* hardswish_op_out)
841 {
842   union xnn_f16_hswish_params params;
843   if (xnn_params.f16.hswish.init.f16_hswish != NULL) {
844     xnn_params.f16.hswish.init.f16_hswish(&params);
845   }
846   return create_unary_elementwise_nc(
847     channels, input_stride, output_stride, flags,
848     &params, sizeof(params), XNN_INIT_FLAG_F16,
849     xnn_operator_type_hardswish_nc_f16,
850     xnn_params.f16.hswish.ukernel,
851     hardswish_op_out);
852 }
853 
xnn_create_hardswish_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * hardswish_op_out)854 enum xnn_status xnn_create_hardswish_nc_f32(
855     size_t channels,
856     size_t input_stride,
857     size_t output_stride,
858     uint32_t flags,
859     xnn_operator_t* hardswish_op_out)
860 {
861   union xnn_f32_hswish_params params;
862   if (xnn_params.f32.hswish.init.f32_hswish != NULL) {
863     xnn_params.f32.hswish.init.f32_hswish(&params);
864   }
865   return create_unary_elementwise_nc(
866     channels, input_stride, output_stride, flags,
867     &params, sizeof(params), XNN_INIT_FLAG_F32,
868     xnn_operator_type_hardswish_nc_f32,
869     xnn_params.f32.hswish.ukernel,
870     hardswish_op_out);
871 }
872 
xnn_create_leaky_relu_nc_f16(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint32_t flags,xnn_operator_t * leaky_relu_op_out)873 enum xnn_status xnn_create_leaky_relu_nc_f16(
874   size_t channels,
875   size_t input_stride,
876   size_t output_stride,
877   float negative_slope,
878   uint32_t flags,
879   xnn_operator_t* leaky_relu_op_out)
880 {
881   const uint16_t negative_slope_as_half = fp16_ieee_from_fp32_value(negative_slope);
882   negative_slope = fp16_ieee_to_fp32_value(negative_slope_as_half);
883   if (!isfinite(negative_slope)) {
884     xnn_log_error(
885       "failed to create %s operator with %f negative slope: finite number expected",
886       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_f32),
887       negative_slope);
888     return xnn_status_invalid_parameter;
889   }
890 
891   union xnn_f16_lrelu_params params;
892   if (xnn_params.f16.lrelu.init.f16_lrelu != NULL) {
893     xnn_params.f16.lrelu.init.f16_lrelu(&params, negative_slope_as_half);
894   }
895   return create_unary_elementwise_nc(
896     channels, input_stride, output_stride, flags,
897     &params, sizeof(params), XNN_INIT_FLAG_F16,
898     xnn_operator_type_leaky_relu_nc_f16,
899     xnn_params.f16.lrelu.ukernel,
900     leaky_relu_op_out);
901 }
902 
xnn_create_leaky_relu_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint32_t flags,xnn_operator_t * leaky_relu_op_out)903 enum xnn_status xnn_create_leaky_relu_nc_f32(
904   size_t channels,
905   size_t input_stride,
906   size_t output_stride,
907   float negative_slope,
908   uint32_t flags,
909   xnn_operator_t* leaky_relu_op_out)
910 {
911   if (!isfinite(negative_slope)) {
912     xnn_log_error(
913       "failed to create %s operator with %f negative slope: finite number expected",
914       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_f32),
915       negative_slope);
916     return xnn_status_invalid_parameter;
917   }
918 
919   union xnn_f32_lrelu_params params;
920   if (xnn_params.f32.lrelu.init.f32_lrelu != NULL) {
921     xnn_params.f32.lrelu.init.f32_lrelu(&params, negative_slope);
922   }
923   return create_unary_elementwise_nc(
924     channels, input_stride, output_stride, flags,
925     &params, sizeof(params), XNN_INIT_FLAG_F32,
926     xnn_operator_type_leaky_relu_nc_f32,
927     xnn_params.f32.lrelu.ukernel,
928     leaky_relu_op_out);
929 }
930 
xnn_create_leaky_relu_nc_qs8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,int8_t input_zero_point,float input_scale,int8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * leaky_relu_op_out)931 enum xnn_status xnn_create_leaky_relu_nc_qs8(
932   size_t channels,
933   size_t input_stride,
934   size_t output_stride,
935   float negative_slope,
936   int8_t input_zero_point,
937   float input_scale,
938   int8_t output_zero_point,
939   float output_scale,
940   uint32_t flags,
941   xnn_operator_t* leaky_relu_op_out)
942 {
943   if (!isfinite(negative_slope)) {
944     xnn_log_error(
945       "failed to create %s operator with %f negative slope: finite number expected",
946       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8),
947       negative_slope);
948     return xnn_status_invalid_parameter;
949   }
950 
951   if (input_scale <= 0.0f || !isnormal(input_scale)) {
952     xnn_log_error(
953       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
954       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), input_scale);
955     return xnn_status_invalid_parameter;
956   }
957 
958   if (output_scale <= 0.0f || !isnormal(output_scale)) {
959     xnn_log_error(
960       "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
961       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), input_scale);
962     return xnn_status_invalid_parameter;
963   }
964 
965   const float positive_input_output_scale = input_scale / output_scale;
966   if (positive_input_output_scale < 0x1.0p-8f || positive_input_output_scale > 0x1.0p+7f) {
967     xnn_log_error(
968       "failed to create %s operator with %.7g positive-input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
969       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), positive_input_output_scale);
970     return xnn_status_invalid_parameter;
971   }
972 
973   const float negative_input_output_scale = positive_input_output_scale * negative_slope;
974   if (negative_input_output_scale < -0x1.FFFC00p+6f || negative_input_output_scale > 0x1.0p+7f) {
975     xnn_log_error(
976       "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be in (-2**7, 2**7] range and ",
977       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), negative_input_output_scale);
978     return xnn_status_invalid_parameter;
979   }
980 
981   if (fabsf(negative_input_output_scale) < 0x1.0p-8f) {
982     xnn_log_error(
983       "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be at least 2**-8 in absolute value",
984       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), negative_input_output_scale);
985     return xnn_status_invalid_parameter;
986   }
987 
988   union xnn_qs8_lrelu_params params;
989   if (xnn_params.qs8.lrelu.init.qs8_lrelu != NULL) {
990     xnn_params.qs8.lrelu.init.qs8_lrelu(&params, positive_input_output_scale, negative_input_output_scale, input_zero_point, output_zero_point);
991   }
992   return create_unary_elementwise_nc(
993     channels, input_stride, output_stride, flags,
994     &params, sizeof(params), XNN_INIT_FLAG_QS8,
995     xnn_operator_type_leaky_relu_nc_qs8,
996     xnn_params.qs8.lrelu.ukernel,
997     leaky_relu_op_out);
998 }
999 
xnn_create_leaky_relu_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * leaky_relu_op_out)1000 enum xnn_status xnn_create_leaky_relu_nc_qu8(
1001   size_t channels,
1002   size_t input_stride,
1003   size_t output_stride,
1004   float negative_slope,
1005   uint8_t input_zero_point,
1006   float input_scale,
1007   uint8_t output_zero_point,
1008   float output_scale,
1009   uint32_t flags,
1010   xnn_operator_t* leaky_relu_op_out)
1011 {
1012   if (!isfinite(negative_slope)) {
1013     xnn_log_error(
1014       "failed to create %s operator with %f negative slope: finite number expected",
1015       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8),
1016       negative_slope);
1017     return xnn_status_invalid_parameter;
1018   }
1019 
1020   if (input_scale <= 0.0f || !isnormal(input_scale)) {
1021     xnn_log_error(
1022       "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
1023       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
1024     return xnn_status_invalid_parameter;
1025   }
1026 
1027   if (output_scale <= 0.0f || !isnormal(output_scale)) {
1028     xnn_log_error(
1029       "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
1030       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
1031     return xnn_status_invalid_parameter;
1032   }
1033 
1034   const float positive_input_output_scale = input_scale / output_scale;
1035   if (positive_input_output_scale < 0x1.0p-8f || positive_input_output_scale > 0x1.0p+7f) {
1036     xnn_log_error(
1037       "failed to create %s operator with %.7g positive-input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
1038       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), positive_input_output_scale);
1039     return xnn_status_invalid_parameter;
1040   }
1041 
1042   const float negative_input_output_scale = positive_input_output_scale * negative_slope;
1043   if (negative_input_output_scale < -0x1.FFFC00p+6f || negative_input_output_scale > 0x1.0p+7f) {
1044     xnn_log_error(
1045       "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be in (-2**7, 2**7] range and ",
1046       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_input_output_scale);
1047     return xnn_status_invalid_parameter;
1048   }
1049 
1050   if (fabsf(negative_input_output_scale) < 0x1.0p-8f) {
1051     xnn_log_error(
1052       "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be at least 2**-8 in absolute value",
1053       xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_input_output_scale);
1054     return xnn_status_invalid_parameter;
1055   }
1056 
1057   union xnn_qu8_lrelu_params params;
1058   if (xnn_params.qu8.lrelu.init.qu8_lrelu != NULL) {
1059     xnn_params.qu8.lrelu.init.qu8_lrelu(&params, positive_input_output_scale, negative_input_output_scale, input_zero_point, output_zero_point);
1060   }
1061   return create_unary_elementwise_nc(
1062     channels, input_stride, output_stride, flags,
1063     &params, sizeof(params), XNN_INIT_FLAG_QU8,
1064     xnn_operator_type_leaky_relu_nc_qu8,
1065     xnn_params.qu8.lrelu.ukernel,
1066     leaky_relu_op_out);
1067 }
1068 
xnn_create_negate_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * negate_op_out)1069 enum xnn_status xnn_create_negate_nc_f16(
1070     size_t channels,
1071     size_t input_stride,
1072     size_t output_stride,
1073     uint32_t flags,
1074     xnn_operator_t* negate_op_out)
1075 {
1076   union xnn_f16_neg_params params;
1077   if (xnn_params.f16.neg.init.f16_neg != NULL) {
1078     xnn_params.f16.neg.init.f16_neg(&params);
1079   }
1080   return create_unary_elementwise_nc(
1081     channels, input_stride, output_stride, flags,
1082     &params, sizeof(params), XNN_INIT_FLAG_F16,
1083     xnn_operator_type_negate_nc_f16,
1084     xnn_params.f16.neg.ukernel,
1085     negate_op_out);
1086 }
1087 
xnn_create_negate_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * negate_op_out)1088 enum xnn_status xnn_create_negate_nc_f32(
1089     size_t channels,
1090     size_t input_stride,
1091     size_t output_stride,
1092     uint32_t flags,
1093     xnn_operator_t* negate_op_out)
1094 {
1095   union xnn_f32_neg_params params;
1096   if (xnn_params.f32.neg.init.f32_neg != NULL) {
1097     xnn_params.f32.neg.init.f32_neg(&params);
1098   }
1099   return create_unary_elementwise_nc(
1100     channels, input_stride, output_stride, flags,
1101     &params, sizeof(params), XNN_INIT_FLAG_F32,
1102     xnn_operator_type_negate_nc_f32,
1103     xnn_params.f32.neg.ukernel,
1104     negate_op_out);
1105 }
1106 
xnn_create_sigmoid_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sigmoid_op_out)1107 enum xnn_status xnn_create_sigmoid_nc_f16(
1108     size_t channels,
1109     size_t input_stride,
1110     size_t output_stride,
1111     uint32_t flags,
1112     xnn_operator_t* sigmoid_op_out)
1113 {
1114   if ((xnn_params.init_flags & XNN_INIT_FLAG_F16) != XNN_INIT_FLAG_F16) {
1115     xnn_log_error("failed to create %s operator: operations on data type are not supported",
1116       xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f16));
1117     return xnn_status_unsupported_hardware;
1118   }
1119 
1120   union xnn_f16_sigmoid_params params;
1121   if (xnn_params.f16.sigmoid.init.f16_sigmoid != NULL) {
1122     xnn_params.f16.sigmoid.init.f16_sigmoid(&params);
1123   }
1124   return create_unary_elementwise_nc(
1125     channels, input_stride, output_stride, flags,
1126     &params, sizeof(params), XNN_INIT_FLAG_F16,
1127     xnn_operator_type_sigmoid_nc_f16,
1128     xnn_params.f16.sigmoid.ukernel,
1129     sigmoid_op_out);
1130 }
1131 
xnn_create_sigmoid_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sigmoid_op_out)1132 enum xnn_status xnn_create_sigmoid_nc_f32(
1133     size_t channels,
1134     size_t input_stride,
1135     size_t output_stride,
1136     uint32_t flags,
1137     xnn_operator_t* sigmoid_op_out)
1138 {
1139   union xnn_f32_sigmoid_params params;
1140   if (xnn_params.f32.sigmoid.init.f32_sigmoid != NULL) {
1141     xnn_params.f32.sigmoid.init.f32_sigmoid(&params);
1142   }
1143   return create_unary_elementwise_nc(
1144     channels, input_stride, output_stride, flags,
1145     &params, sizeof(params), XNN_INIT_FLAG_F32,
1146     xnn_operator_type_sigmoid_nc_f32,
1147     xnn_params.f32.sigmoid.ukernel,
1148     sigmoid_op_out);
1149 }
1150 
xnn_create_square_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * square_op_out)1151 enum xnn_status xnn_create_square_nc_f16(
1152     size_t channels,
1153     size_t input_stride,
1154     size_t output_stride,
1155     uint32_t flags,
1156     xnn_operator_t* square_op_out)
1157 {
1158   return create_unary_elementwise_nc(
1159     channels, input_stride, output_stride, flags,
1160     NULL, 0, XNN_INIT_FLAG_F16,
1161     xnn_operator_type_square_nc_f16,
1162     xnn_params.f16.sqr.ukernel,
1163     square_op_out);
1164 }
1165 
xnn_create_square_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * square_op_out)1166 enum xnn_status xnn_create_square_nc_f32(
1167     size_t channels,
1168     size_t input_stride,
1169     size_t output_stride,
1170     uint32_t flags,
1171     xnn_operator_t* square_op_out)
1172 {
1173   union xnn_f32_default_params params;
1174   if (xnn_params.f32.sqr.init.f32_default != NULL) {
1175     xnn_params.f32.sqr.init.f32_default(&params);
1176   }
1177   return create_unary_elementwise_nc(
1178     channels, input_stride, output_stride, flags,
1179     &params, sizeof(params), XNN_INIT_FLAG_F32,
1180     xnn_operator_type_square_nc_f32,
1181     xnn_params.f32.sqr.ukernel,
1182     square_op_out);
1183 }
1184 
xnn_create_square_root_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sqrt_op_out)1185 enum xnn_status xnn_create_square_root_nc_f16(
1186     size_t channels,
1187     size_t input_stride,
1188     size_t output_stride,
1189     uint32_t flags,
1190     xnn_operator_t* sqrt_op_out)
1191 {
1192   return create_unary_elementwise_nc(
1193     channels, input_stride, output_stride, flags,
1194     NULL, 0, XNN_INIT_FLAG_F16,
1195     xnn_operator_type_square_root_nc_f16,
1196     xnn_params.f16.sqrt.ukernel,
1197     sqrt_op_out);
1198 }
1199 
xnn_create_square_root_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sqrt_op_out)1200 enum xnn_status xnn_create_square_root_nc_f32(
1201     size_t channels,
1202     size_t input_stride,
1203     size_t output_stride,
1204     uint32_t flags,
1205     xnn_operator_t* sqrt_op_out)
1206 {
1207   union xnn_f32_sqrt_params params;
1208   if (xnn_params.f32.sqrt.init.f32_sqrt != NULL) {
1209     xnn_params.f32.sqrt.init.f32_sqrt(&params);
1210   }
1211   return create_unary_elementwise_nc(
1212     channels, input_stride, output_stride, flags,
1213     &params, sizeof(params), XNN_INIT_FLAG_F32,
1214     xnn_operator_type_square_root_nc_f32,
1215     xnn_params.f32.sqrt.ukernel,
1216     sqrt_op_out);
1217 }
1218 
xnn_create_truncation_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * truncation_op_out)1219 enum xnn_status xnn_create_truncation_nc_f16(
1220     size_t channels,
1221     size_t input_stride,
1222     size_t output_stride,
1223     uint32_t flags,
1224     xnn_operator_t* truncation_op_out)
1225 {
1226   return create_unary_elementwise_nc(
1227     channels, input_stride, output_stride, flags,
1228     NULL, 0, XNN_INIT_FLAG_F16,
1229     xnn_operator_type_truncation_nc_f16,
1230     xnn_params.f16.rndz.ukernel,
1231     truncation_op_out);
1232 }
1233 
xnn_create_truncation_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * truncation_op_out)1234 enum xnn_status xnn_create_truncation_nc_f32(
1235     size_t channels,
1236     size_t input_stride,
1237     size_t output_stride,
1238     uint32_t flags,
1239     xnn_operator_t* truncation_op_out)
1240 {
1241   union xnn_f32_rnd_params params;
1242   if (xnn_params.f32.rndz.init.f32_rnd != NULL) {
1243     xnn_params.f32.rndz.init.f32_rnd(&params);
1244   }
1245   return create_unary_elementwise_nc(
1246     channels, input_stride, output_stride, flags,
1247     &params, sizeof(params), XNN_INIT_FLAG_F32,
1248     xnn_operator_type_truncation_nc_f32,
1249     xnn_params.f32.rndz.ukernel,
1250     truncation_op_out);
1251 }
1252 
xnn_setup_abs_nc_f16(xnn_operator_t abs_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1253 enum xnn_status xnn_setup_abs_nc_f16(
1254     xnn_operator_t abs_op,
1255     size_t batch_size,
1256     const void* input,
1257     void* output,
1258     pthreadpool_t threadpool)
1259 {
1260   return setup_unary_elementwise_nc(
1261     abs_op, xnn_operator_type_abs_nc_f16,
1262     batch_size, input, output,
1263     1 /* log2(sizeof(uint16_t)) */,
1264     1 /* log2(sizeof(uint16_t)) */,
1265     &abs_op->params.f16_abs, sizeof(abs_op->params.f16_abs),
1266     pthreadpool_get_threads_count(threadpool));
1267 }
1268 
xnn_setup_abs_nc_f32(xnn_operator_t abs_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1269 enum xnn_status xnn_setup_abs_nc_f32(
1270     xnn_operator_t abs_op,
1271     size_t batch_size,
1272     const float* input,
1273     float* output,
1274     pthreadpool_t threadpool)
1275 {
1276   return setup_unary_elementwise_nc(
1277     abs_op, xnn_operator_type_abs_nc_f32,
1278     batch_size, input, output,
1279     2 /* log2(sizeof(float)) */,
1280     2 /* log2(sizeof(float)) */,
1281     &abs_op->params.f32_abs, sizeof(abs_op->params.f32_abs),
1282     pthreadpool_get_threads_count(threadpool));
1283 }
1284 
xnn_setup_bankers_rounding_nc_f16(xnn_operator_t rounding_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1285 enum xnn_status xnn_setup_bankers_rounding_nc_f16(
1286     xnn_operator_t rounding_op,
1287     size_t batch_size,
1288     const void* input,
1289     void* output,
1290     pthreadpool_t threadpool)
1291 {
1292   return setup_unary_elementwise_nc(
1293     rounding_op, xnn_operator_type_bankers_rounding_nc_f16,
1294     batch_size, input, output,
1295     1 /* log2(sizeof(half)) */,
1296     1 /* log2(sizeof(half)) */,
1297     NULL, 0,
1298     pthreadpool_get_threads_count(threadpool));
1299 }
1300 
xnn_setup_bankers_rounding_nc_f32(xnn_operator_t rounding_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1301 enum xnn_status xnn_setup_bankers_rounding_nc_f32(
1302     xnn_operator_t rounding_op,
1303     size_t batch_size,
1304     const float* input,
1305     float* output,
1306     pthreadpool_t threadpool)
1307 {
1308   return setup_unary_elementwise_nc(
1309     rounding_op, xnn_operator_type_bankers_rounding_nc_f32,
1310     batch_size, input, output,
1311     2 /* log2(sizeof(float)) */,
1312     2 /* log2(sizeof(float)) */,
1313     &rounding_op->params.f32_rnd, sizeof(rounding_op->params.f32_rnd),
1314     pthreadpool_get_threads_count(threadpool));
1315 }
1316 
xnn_setup_ceiling_nc_f16(xnn_operator_t ceiling_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1317 enum xnn_status xnn_setup_ceiling_nc_f16(
1318     xnn_operator_t ceiling_op,
1319     size_t batch_size,
1320     const void* input,
1321     void* output,
1322     pthreadpool_t threadpool)
1323 {
1324   return setup_unary_elementwise_nc(
1325     ceiling_op, xnn_operator_type_ceiling_nc_f16,
1326     batch_size, input, output,
1327     1 /* log2(sizeof(half)) */,
1328     1 /* log2(sizeof(half)) */,
1329     NULL, 0,
1330     pthreadpool_get_threads_count(threadpool));
1331 }
1332 
xnn_setup_ceiling_nc_f32(xnn_operator_t ceiling_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1333 enum xnn_status xnn_setup_ceiling_nc_f32(
1334     xnn_operator_t ceiling_op,
1335     size_t batch_size,
1336     const float* input,
1337     float* output,
1338     pthreadpool_t threadpool)
1339 {
1340   return setup_unary_elementwise_nc(
1341     ceiling_op, xnn_operator_type_ceiling_nc_f32,
1342     batch_size, input, output,
1343     2 /* log2(sizeof(float)) */,
1344     2 /* log2(sizeof(float)) */,
1345     &ceiling_op->params.f32_rnd, sizeof(ceiling_op->params.f32_rnd),
1346     pthreadpool_get_threads_count(threadpool));
1347 }
1348 
xnn_setup_clamp_nc_f16(xnn_operator_t clamp_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1349 enum xnn_status xnn_setup_clamp_nc_f16(
1350     xnn_operator_t clamp_op,
1351     size_t batch_size,
1352     const void* input,
1353     void* output,
1354     pthreadpool_t threadpool)
1355 {
1356   return setup_unary_elementwise_nc(
1357     clamp_op, xnn_operator_type_clamp_nc_f16,
1358     batch_size, input, output,
1359     1 /* log2(sizeof(uint16_t)) */,
1360     1 /* log2(sizeof(uint16_t)) */,
1361     &clamp_op->params.f16_minmax, sizeof(clamp_op->params.f16_minmax),
1362     pthreadpool_get_threads_count(threadpool));
1363 }
1364 
xnn_setup_clamp_nc_f32(xnn_operator_t clamp_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1365 enum xnn_status xnn_setup_clamp_nc_f32(
1366     xnn_operator_t clamp_op,
1367     size_t batch_size,
1368     const float* input,
1369     float* output,
1370     pthreadpool_t threadpool)
1371 {
1372   return setup_unary_elementwise_nc(
1373     clamp_op, xnn_operator_type_clamp_nc_f32,
1374     batch_size, input, output,
1375     2 /* log2(sizeof(float)) */,
1376     2 /* log2(sizeof(float)) */,
1377     &clamp_op->params.f32_minmax, sizeof(clamp_op->params.f32_minmax),
1378     pthreadpool_get_threads_count(threadpool));
1379 }
1380 
xnn_setup_clamp_nc_s8(xnn_operator_t clamp_op,size_t batch_size,const int8_t * input,int8_t * output,pthreadpool_t threadpool)1381 enum xnn_status xnn_setup_clamp_nc_s8(
1382     xnn_operator_t clamp_op,
1383     size_t batch_size,
1384     const int8_t* input,
1385     int8_t* output,
1386     pthreadpool_t threadpool)
1387 {
1388   return setup_unary_elementwise_nc(
1389     clamp_op, xnn_operator_type_clamp_nc_s8,
1390     batch_size, input, output,
1391     0 /* log2(sizeof(int8_t)) */,
1392     0 /* log2(sizeof(int8_t)) */,
1393     &clamp_op->params.s8_minmax, sizeof(clamp_op->params.s8_minmax),
1394     pthreadpool_get_threads_count(threadpool));
1395 }
1396 
xnn_setup_clamp_nc_u8(xnn_operator_t clamp_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)1397 enum xnn_status xnn_setup_clamp_nc_u8(
1398     xnn_operator_t clamp_op,
1399     size_t batch_size,
1400     const uint8_t* input,
1401     uint8_t* output,
1402     pthreadpool_t threadpool)
1403 {
1404   return setup_unary_elementwise_nc(
1405     clamp_op, xnn_operator_type_clamp_nc_u8,
1406     batch_size, input, output,
1407     0 /* log2(sizeof(uint8_t)) */,
1408     0 /* log2(sizeof(uint8_t)) */,
1409     &clamp_op->params.u8_minmax, sizeof(clamp_op->params.u8_minmax),
1410     pthreadpool_get_threads_count(threadpool));
1411 }
1412 
xnn_setup_convert_nc_f16_f32(xnn_operator_t convert_op,size_t batch_size,const void * input,float * output,pthreadpool_t threadpool)1413 enum xnn_status xnn_setup_convert_nc_f16_f32(
1414   xnn_operator_t convert_op,
1415   size_t batch_size,
1416   const void* input,
1417   float* output,
1418   pthreadpool_t threadpool)
1419 {
1420   return setup_unary_elementwise_nc(
1421     convert_op, xnn_operator_type_convert_nc_f16_f32,
1422     batch_size, input, output,
1423     1 /* log2(sizeof(uint16_t)) */,
1424     2 /* log2(sizeof(float)) */,
1425     &convert_op->params.f16_f32_cvt, sizeof(convert_op->params.f16_f32_cvt),
1426     pthreadpool_get_threads_count(threadpool));
1427 }
1428 
xnn_setup_convert_nc_f32_f16(xnn_operator_t convert_op,size_t batch_size,const float * input,void * output,pthreadpool_t threadpool)1429 enum xnn_status xnn_setup_convert_nc_f32_f16(
1430   xnn_operator_t convert_op,
1431   size_t batch_size,
1432   const float* input,
1433   void* output,
1434   pthreadpool_t threadpool)
1435 {
1436   return setup_unary_elementwise_nc(
1437     convert_op, xnn_operator_type_convert_nc_f32_f16,
1438     batch_size, input, output,
1439     2 /* log2(sizeof(float)) */,
1440     1 /* log2(sizeof(uint16_t)) */,
1441     &convert_op->params.f32_f16_cvt, sizeof(convert_op->params.f32_f16_cvt),
1442     pthreadpool_get_threads_count(threadpool));
1443 }
1444 
xnn_setup_convert_nc_f32_qs8(xnn_operator_t convert_op,size_t batch_size,const float * input,int8_t * output,pthreadpool_t threadpool)1445 enum xnn_status xnn_setup_convert_nc_f32_qs8(
1446   xnn_operator_t convert_op,
1447   size_t batch_size,
1448   const float* input,
1449   int8_t* output,
1450   pthreadpool_t threadpool)
1451 {
1452   return setup_unary_elementwise_nc(
1453     convert_op, xnn_operator_type_convert_nc_f32_qs8,
1454     batch_size, input, output,
1455     2 /* log2(sizeof(float)) */,
1456     0 /* log2(sizeof(int8_t)) */,
1457     &convert_op->params.f32_qs8_cvt, sizeof(convert_op->params.f32_qs8_cvt),
1458     pthreadpool_get_threads_count(threadpool));
1459 }
1460 
xnn_setup_convert_nc_f32_qu8(xnn_operator_t convert_op,size_t batch_size,const float * input,uint8_t * output,pthreadpool_t threadpool)1461 enum xnn_status xnn_setup_convert_nc_f32_qu8(
1462   xnn_operator_t convert_op,
1463   size_t batch_size,
1464   const float* input,
1465   uint8_t* output,
1466   pthreadpool_t threadpool)
1467 {
1468   return setup_unary_elementwise_nc(
1469     convert_op, xnn_operator_type_convert_nc_f32_qu8,
1470     batch_size, input, output,
1471     2 /* log2(sizeof(float)) */,
1472     0 /* log2(sizeof(uint8_t)) */,
1473     &convert_op->params.f32_qu8_cvt, sizeof(convert_op->params.f32_qu8_cvt),
1474     pthreadpool_get_threads_count(threadpool));
1475 }
1476 
xnn_setup_convert_nc_qs8(xnn_operator_t convert_op,size_t batch_size,const int8_t * input,int8_t * output,pthreadpool_t threadpool)1477 enum xnn_status xnn_setup_convert_nc_qs8(
1478   xnn_operator_t convert_op,
1479   size_t batch_size,
1480   const int8_t* input,
1481   int8_t* output,
1482   pthreadpool_t threadpool)
1483 {
1484   return setup_unary_elementwise_nc(
1485     convert_op, xnn_operator_type_convert_nc_qs8,
1486     batch_size, input, output,
1487     0 /* log2(sizeof(int8_t)) */,
1488     0 /* log2(sizeof(int8_t)) */,
1489     &convert_op->params.qs8_cvt, sizeof(convert_op->params.qs8_cvt),
1490     pthreadpool_get_threads_count(threadpool));
1491 }
1492 
xnn_setup_convert_nc_qs8_f32(xnn_operator_t convert_op,size_t batch_size,const int8_t * input,float * output,pthreadpool_t threadpool)1493 enum xnn_status xnn_setup_convert_nc_qs8_f32(
1494   xnn_operator_t convert_op,
1495   size_t batch_size,
1496   const int8_t* input,
1497   float* output,
1498   pthreadpool_t threadpool)
1499 {
1500   return setup_unary_elementwise_nc(
1501     convert_op, xnn_operator_type_convert_nc_qs8_f32,
1502     batch_size, input, output,
1503     0 /* log2(sizeof(int8_t)) */,
1504     2 /* log2(sizeof(float)) */,
1505     &convert_op->params.qs8_f32_cvt, sizeof(convert_op->params.qs8_f32_cvt),
1506     pthreadpool_get_threads_count(threadpool));
1507 }
1508 
xnn_setup_convert_nc_qu8(xnn_operator_t convert_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)1509 enum xnn_status xnn_setup_convert_nc_qu8(
1510   xnn_operator_t convert_op,
1511   size_t batch_size,
1512   const uint8_t* input,
1513   uint8_t* output,
1514   pthreadpool_t threadpool)
1515 {
1516   return setup_unary_elementwise_nc(
1517     convert_op, xnn_operator_type_convert_nc_qu8,
1518     batch_size, input, output,
1519     0 /* log2(sizeof(uint8_t)) */,
1520     0 /* log2(sizeof(uint8_t)) */,
1521     &convert_op->params.qu8_cvt, sizeof(convert_op->params.qu8_cvt),
1522     pthreadpool_get_threads_count(threadpool));
1523 }
1524 
xnn_setup_convert_nc_qu8_f32(xnn_operator_t convert_op,size_t batch_size,const uint8_t * input,float * output,pthreadpool_t threadpool)1525 enum xnn_status xnn_setup_convert_nc_qu8_f32(
1526   xnn_operator_t convert_op,
1527   size_t batch_size,
1528   const uint8_t* input,
1529   float* output,
1530   pthreadpool_t threadpool)
1531 {
1532   return setup_unary_elementwise_nc(
1533     convert_op, xnn_operator_type_convert_nc_qu8_f32,
1534     batch_size, input, output,
1535     0 /* log2(sizeof(uint8_t)) */,
1536     2 /* log2(sizeof(float)) */,
1537     &convert_op->params.qu8_f32_cvt, sizeof(convert_op->params.qu8_f32_cvt),
1538     pthreadpool_get_threads_count(threadpool));
1539 }
1540 
xnn_setup_copy_nc_x8(xnn_operator_t copy_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1541 enum xnn_status xnn_setup_copy_nc_x8(
1542     xnn_operator_t copy_op,
1543     size_t batch_size,
1544     const void* input,
1545     void* output,
1546     pthreadpool_t threadpool)
1547 {
1548   return setup_unary_elementwise_nc(
1549     copy_op, xnn_operator_type_copy_nc_x8,
1550     batch_size, input, output,
1551     0 /* log2(sizeof(uint16_t)) */,
1552     0 /* log2(sizeof(uint16_t)) */,
1553     NULL, 0,
1554     pthreadpool_get_threads_count(threadpool));
1555 }
1556 
xnn_setup_copy_nc_x16(xnn_operator_t copy_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1557 enum xnn_status xnn_setup_copy_nc_x16(
1558     xnn_operator_t copy_op,
1559     size_t batch_size,
1560     const void* input,
1561     void* output,
1562     pthreadpool_t threadpool)
1563 {
1564   return setup_unary_elementwise_nc(
1565     copy_op, xnn_operator_type_copy_nc_x16,
1566     batch_size, input, output,
1567     1 /* log2(sizeof(uint16_t)) */,
1568     1 /* log2(sizeof(uint16_t)) */,
1569     NULL, 0,
1570     pthreadpool_get_threads_count(threadpool));
1571 }
1572 
xnn_setup_copy_nc_x32(xnn_operator_t copy_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1573 enum xnn_status xnn_setup_copy_nc_x32(
1574     xnn_operator_t copy_op,
1575     size_t batch_size,
1576     const void* input,
1577     void* output,
1578     pthreadpool_t threadpool)
1579 {
1580   return setup_unary_elementwise_nc(
1581     copy_op, xnn_operator_type_copy_nc_x32,
1582     batch_size, input, output,
1583     2 /* log2(sizeof(uint32_t)) */,
1584     2 /* log2(sizeof(uint32_t)) */,
1585     NULL, 0,
1586     pthreadpool_get_threads_count(threadpool));
1587 }
1588 
xnn_setup_elu_nc_f16(xnn_operator_t elu_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1589 enum xnn_status xnn_setup_elu_nc_f16(
1590     xnn_operator_t elu_op,
1591     size_t batch_size,
1592     const void* input,
1593     void* output,
1594     pthreadpool_t threadpool)
1595 {
1596   return setup_unary_elementwise_nc(
1597     elu_op, xnn_operator_type_elu_nc_f16,
1598     batch_size, input, output,
1599     1 /* log2(sizeof(half)) */,
1600     1 /* log2(sizeof(half)) */,
1601     &elu_op->params.f16_elu, sizeof(elu_op->params.f16_elu),
1602     pthreadpool_get_threads_count(threadpool));
1603 }
1604 
xnn_setup_elu_nc_f32(xnn_operator_t elu_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1605 enum xnn_status xnn_setup_elu_nc_f32(
1606     xnn_operator_t elu_op,
1607     size_t batch_size,
1608     const float* input,
1609     float* output,
1610     pthreadpool_t threadpool)
1611 {
1612   return setup_unary_elementwise_nc(
1613     elu_op, xnn_operator_type_elu_nc_f32,
1614     batch_size, input, output,
1615     2 /* log2(sizeof(float)) */,
1616     2 /* log2(sizeof(float)) */,
1617     &elu_op->params.f32_elu, sizeof(elu_op->params.f32_elu),
1618     pthreadpool_get_threads_count(threadpool));
1619 }
1620 
xnn_setup_floor_nc_f16(xnn_operator_t floor_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1621 enum xnn_status xnn_setup_floor_nc_f16(
1622     xnn_operator_t floor_op,
1623     size_t batch_size,
1624     const void* input,
1625     void* output,
1626     pthreadpool_t threadpool)
1627 {
1628   return setup_unary_elementwise_nc(
1629     floor_op, xnn_operator_type_floor_nc_f16,
1630     batch_size, input, output,
1631     1 /* log2(sizeof(half)) */,
1632     1 /* log2(sizeof(half)) */,
1633     NULL, 0,
1634     pthreadpool_get_threads_count(threadpool));
1635 }
1636 
xnn_setup_floor_nc_f32(xnn_operator_t floor_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1637 enum xnn_status xnn_setup_floor_nc_f32(
1638     xnn_operator_t floor_op,
1639     size_t batch_size,
1640     const float* input,
1641     float* output,
1642     pthreadpool_t threadpool)
1643 {
1644   return setup_unary_elementwise_nc(
1645     floor_op, xnn_operator_type_floor_nc_f32,
1646     batch_size, input, output,
1647     2 /* log2(sizeof(float)) */,
1648     2 /* log2(sizeof(float)) */,
1649     &floor_op->params.f32_rnd, sizeof(floor_op->params.f32_rnd),
1650     pthreadpool_get_threads_count(threadpool));
1651 }
1652 
xnn_setup_hardswish_nc_f16(xnn_operator_t hardswish_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1653 enum xnn_status xnn_setup_hardswish_nc_f16(
1654     xnn_operator_t hardswish_op,
1655     size_t batch_size,
1656     const void* input,
1657     void* output,
1658     pthreadpool_t threadpool)
1659 {
1660   return setup_unary_elementwise_nc(
1661     hardswish_op, xnn_operator_type_hardswish_nc_f16,
1662     batch_size, input, output,
1663     1 /* log2(sizeof(half)) */,
1664     1 /* log2(sizeof(half)) */,
1665     &hardswish_op->params.f16_hswish, sizeof(hardswish_op->params.f16_hswish),
1666     pthreadpool_get_threads_count(threadpool));
1667 }
1668 
xnn_setup_hardswish_nc_f32(xnn_operator_t hardswish_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1669 enum xnn_status xnn_setup_hardswish_nc_f32(
1670     xnn_operator_t hardswish_op,
1671     size_t batch_size,
1672     const float* input,
1673     float* output,
1674     pthreadpool_t threadpool)
1675 {
1676   return setup_unary_elementwise_nc(
1677     hardswish_op, xnn_operator_type_hardswish_nc_f32,
1678     batch_size, input, output,
1679     2 /* log2(sizeof(float)) */,
1680     2 /* log2(sizeof(float)) */,
1681     &hardswish_op->params.f32_hswish, sizeof(hardswish_op->params.f32_hswish),
1682     pthreadpool_get_threads_count(threadpool));
1683 }
1684 
xnn_setup_leaky_relu_nc_f16(xnn_operator_t leaky_relu_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1685 enum xnn_status xnn_setup_leaky_relu_nc_f16(
1686   xnn_operator_t leaky_relu_op,
1687   size_t batch_size,
1688   const void* input,
1689   void* output,
1690   pthreadpool_t threadpool)
1691 {
1692   return setup_unary_elementwise_nc(
1693     leaky_relu_op, xnn_operator_type_leaky_relu_nc_f16,
1694     batch_size, input, output,
1695     1 /* log2(sizeof(uint16_t)) */,
1696     1 /* log2(sizeof(uint16_t)) */,
1697     &leaky_relu_op->params.f16_lrelu, sizeof(leaky_relu_op->params.f16_lrelu),
1698     pthreadpool_get_threads_count(threadpool));
1699 }
1700 
xnn_setup_leaky_relu_nc_f32(xnn_operator_t leaky_relu_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1701 enum xnn_status xnn_setup_leaky_relu_nc_f32(
1702   xnn_operator_t leaky_relu_op,
1703   size_t batch_size,
1704   const float* input,
1705   float* output,
1706   pthreadpool_t threadpool)
1707 {
1708   return setup_unary_elementwise_nc(
1709     leaky_relu_op, xnn_operator_type_leaky_relu_nc_f32,
1710     batch_size, input, output,
1711     2 /* log2(sizeof(float)) */,
1712     2 /* log2(sizeof(float)) */,
1713     &leaky_relu_op->params.f32_lrelu, sizeof(leaky_relu_op->params.f32_lrelu),
1714     pthreadpool_get_threads_count(threadpool));
1715 }
1716 
xnn_setup_leaky_relu_nc_qs8(xnn_operator_t leaky_relu_op,size_t batch_size,const int8_t * input,int8_t * output,pthreadpool_t threadpool)1717 enum xnn_status xnn_setup_leaky_relu_nc_qs8(
1718   xnn_operator_t leaky_relu_op,
1719   size_t batch_size,
1720   const int8_t* input,
1721   int8_t* output,
1722   pthreadpool_t threadpool)
1723 {
1724   return setup_unary_elementwise_nc(
1725     leaky_relu_op, xnn_operator_type_leaky_relu_nc_qs8,
1726     batch_size, input, output,
1727     0 /* log2(sizeof(int8_t)) */,
1728     0 /* log2(sizeof(int8_t)) */,
1729     &leaky_relu_op->params.qs8_lrelu, sizeof(leaky_relu_op->params.qs8_lrelu),
1730     pthreadpool_get_threads_count(threadpool));
1731 }
1732 
xnn_setup_leaky_relu_nc_qu8(xnn_operator_t leaky_relu_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)1733 enum xnn_status xnn_setup_leaky_relu_nc_qu8(
1734   xnn_operator_t leaky_relu_op,
1735   size_t batch_size,
1736   const uint8_t* input,
1737   uint8_t* output,
1738   pthreadpool_t threadpool)
1739 {
1740   return setup_unary_elementwise_nc(
1741     leaky_relu_op, xnn_operator_type_leaky_relu_nc_qu8,
1742     batch_size, input, output,
1743     0 /* log2(sizeof(uint8_t)) */,
1744     0 /* log2(sizeof(uint8_t)) */,
1745     &leaky_relu_op->params.qu8_lrelu, sizeof(leaky_relu_op->params.qu8_lrelu),
1746     pthreadpool_get_threads_count(threadpool));
1747 }
1748 
xnn_setup_negate_nc_f16(xnn_operator_t negate_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1749 enum xnn_status xnn_setup_negate_nc_f16(
1750     xnn_operator_t negate_op,
1751     size_t batch_size,
1752     const void* input,
1753     void* output,
1754     pthreadpool_t threadpool)
1755 {
1756   return setup_unary_elementwise_nc(
1757     negate_op, xnn_operator_type_negate_nc_f16,
1758     batch_size, input, output,
1759     1 /* log2(sizeof(uint16_t)) */,
1760     1 /* log2(sizeof(uint16_t)) */,
1761     &negate_op->params.f16_neg, sizeof(negate_op->params.f16_neg),
1762     pthreadpool_get_threads_count(threadpool));
1763 }
1764 
xnn_setup_negate_nc_f32(xnn_operator_t negate_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1765 enum xnn_status xnn_setup_negate_nc_f32(
1766     xnn_operator_t negate_op,
1767     size_t batch_size,
1768     const float* input,
1769     float* output,
1770     pthreadpool_t threadpool)
1771 {
1772   return setup_unary_elementwise_nc(
1773     negate_op, xnn_operator_type_negate_nc_f32,
1774     batch_size, input, output,
1775     2 /* log2(sizeof(float)) */,
1776     2 /* log2(sizeof(float)) */,
1777     &negate_op->params.f32_neg, sizeof(negate_op->params.f32_neg),
1778     pthreadpool_get_threads_count(threadpool));
1779 }
1780 
xnn_setup_sigmoid_nc_f16(xnn_operator_t sigmoid_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1781 enum xnn_status xnn_setup_sigmoid_nc_f16(
1782     xnn_operator_t sigmoid_op,
1783     size_t batch_size,
1784     const void* input,
1785     void* output,
1786     pthreadpool_t threadpool)
1787 {
1788   return setup_unary_elementwise_nc(
1789     sigmoid_op, xnn_operator_type_sigmoid_nc_f16,
1790     batch_size, input, output,
1791     1 /* log2(sizeof(uint16_t)) */,
1792     1 /* log2(sizeof(uint16_t)) */,
1793     &sigmoid_op->params.f16_sigmoid, sizeof(sigmoid_op->params.f16_sigmoid),
1794     pthreadpool_get_threads_count(threadpool));
1795 }
1796 
xnn_setup_sigmoid_nc_f32(xnn_operator_t sigmoid_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1797 enum xnn_status xnn_setup_sigmoid_nc_f32(
1798     xnn_operator_t sigmoid_op,
1799     size_t batch_size,
1800     const float* input,
1801     float* output,
1802     pthreadpool_t threadpool)
1803 {
1804   return setup_unary_elementwise_nc(
1805     sigmoid_op, xnn_operator_type_sigmoid_nc_f32,
1806     batch_size, input, output,
1807     2 /* log2(sizeof(float)) */,
1808     2 /* log2(sizeof(float)) */,
1809     &sigmoid_op->params.f32_sigmoid, sizeof(sigmoid_op->params.f32_sigmoid),
1810     pthreadpool_get_threads_count(threadpool));
1811 }
1812 
xnn_setup_square_nc_f16(xnn_operator_t square_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1813 enum xnn_status xnn_setup_square_nc_f16(
1814     xnn_operator_t square_op,
1815     size_t batch_size,
1816     const void* input,
1817     void* output,
1818     pthreadpool_t threadpool)
1819 {
1820   return setup_unary_elementwise_nc(
1821     square_op, xnn_operator_type_square_nc_f16,
1822     batch_size, input, output,
1823     1 /* log2(sizeof(uint16_t)) */,
1824     1 /* log2(sizeof(uint16_t)) */,
1825     NULL, 0,
1826     pthreadpool_get_threads_count(threadpool));
1827 }
1828 
xnn_setup_square_nc_f32(xnn_operator_t square_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1829 enum xnn_status xnn_setup_square_nc_f32(
1830     xnn_operator_t square_op,
1831     size_t batch_size,
1832     const float* input,
1833     float* output,
1834     pthreadpool_t threadpool)
1835 {
1836   return setup_unary_elementwise_nc(
1837     square_op, xnn_operator_type_square_nc_f32,
1838     batch_size, input, output,
1839     2 /* log2(sizeof(float)) */,
1840     2 /* log2(sizeof(float)) */,
1841     &square_op->params.f32_default, sizeof(square_op->params.f32_default),
1842     pthreadpool_get_threads_count(threadpool));
1843 }
1844 
xnn_setup_square_root_nc_f16(xnn_operator_t sqrt_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1845 enum xnn_status xnn_setup_square_root_nc_f16(
1846     xnn_operator_t sqrt_op,
1847     size_t batch_size,
1848     const void* input,
1849     void* output,
1850     pthreadpool_t threadpool)
1851 {
1852   return setup_unary_elementwise_nc(
1853     sqrt_op, xnn_operator_type_square_root_nc_f16,
1854     batch_size, input, output,
1855     1 /* log2(sizeof(half)) */,
1856     1 /* log2(sizeof(half)) */,
1857     NULL, 0,
1858     pthreadpool_get_threads_count(threadpool));
1859 }
1860 
xnn_setup_square_root_nc_f32(xnn_operator_t sqrt_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1861 enum xnn_status xnn_setup_square_root_nc_f32(
1862     xnn_operator_t sqrt_op,
1863     size_t batch_size,
1864     const float* input,
1865     float* output,
1866     pthreadpool_t threadpool)
1867 {
1868   return setup_unary_elementwise_nc(
1869     sqrt_op, xnn_operator_type_square_root_nc_f32,
1870     batch_size, input, output,
1871     2 /* log2(sizeof(float)) */,
1872     2 /* log2(sizeof(float)) */,
1873     &sqrt_op->params.f32_sqrt, sizeof(sqrt_op->params.f32_sqrt),
1874     pthreadpool_get_threads_count(threadpool));
1875 }
1876 
xnn_setup_truncation_nc_f16(xnn_operator_t truncation_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1877 enum xnn_status xnn_setup_truncation_nc_f16(
1878     xnn_operator_t truncation_op,
1879     size_t batch_size,
1880     const void* input,
1881     void* output,
1882     pthreadpool_t threadpool)
1883 {
1884   return setup_unary_elementwise_nc(
1885     truncation_op, xnn_operator_type_truncation_nc_f16,
1886     batch_size, input, output,
1887     1 /* log2(sizeof(half)) */,
1888     1 /* log2(sizeof(half)) */,
1889     NULL, 0,
1890     pthreadpool_get_threads_count(threadpool));
1891 }
1892 
xnn_setup_truncation_nc_f32(xnn_operator_t truncation_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1893 enum xnn_status xnn_setup_truncation_nc_f32(
1894     xnn_operator_t truncation_op,
1895     size_t batch_size,
1896     const float* input,
1897     float* output,
1898     pthreadpool_t threadpool)
1899 {
1900   return setup_unary_elementwise_nc(
1901     truncation_op, xnn_operator_type_truncation_nc_f32,
1902     batch_size, input, output,
1903     2 /* log2(sizeof(float)) */,
1904     2 /* log2(sizeof(float)) */,
1905     &truncation_op->params.f32_rnd, sizeof(truncation_op->params.f32_rnd),
1906     pthreadpool_get_threads_count(threadpool));
1907 }
1908