1 // Copyright 2020 Google LLC
2 //
3 // This source code is licensed under the BSD-style license found in the
4 // LICENSE file in the root directory of this source tree.
5
6 #include <assert.h>
7 #include <math.h>
8 #include <stddef.h>
9 #include <stdint.h>
10 #include <stdlib.h>
11
12 #include <fp16.h>
13
14 #include <xnnpack.h>
15 #include <xnnpack/allocator.h>
16 #include <xnnpack/log.h>
17 #include <xnnpack/operator.h>
18 #include <xnnpack/microparams-init.h>
19 #include <xnnpack/params.h>
20
21
create_unary_elementwise_nc(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,const void * params,size_t params_size,uint32_t datatype_init_flags,enum xnn_operator_type operator_type,xnn_vunary_ukernel_function ukernel,xnn_operator_t * unary_elementwise_op_out)22 static enum xnn_status create_unary_elementwise_nc(
23 size_t channels,
24 size_t input_stride,
25 size_t output_stride,
26 uint32_t flags,
27 const void* params,
28 size_t params_size,
29 uint32_t datatype_init_flags,
30 enum xnn_operator_type operator_type,
31 xnn_vunary_ukernel_function ukernel,
32 xnn_operator_t* unary_elementwise_op_out)
33 {
34 xnn_operator_t unary_elementwise_op = NULL;
35
36 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
37 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
38 xnn_operator_type_to_string(operator_type));
39 return xnn_status_uninitialized;
40 }
41
42 if ((xnn_params.init_flags & datatype_init_flags) != datatype_init_flags) {
43 xnn_log_error("failed to create %s operator: operations on data type are not supported",
44 xnn_operator_type_to_string(operator_type));
45 return xnn_status_unsupported_hardware;
46 }
47
48 if (channels == 0) {
49 xnn_log_error(
50 "failed to create %s operator with %zu channels: number of channels must be non-zero",
51 xnn_operator_type_to_string(operator_type), channels);
52 return xnn_status_invalid_parameter;
53 }
54
55 if (input_stride < channels) {
56 xnn_log_error(
57 "failed to create %s operator with input element stride of %zu: "
58 "stride must be at least as large as the number of channels (%zu)",
59 xnn_operator_type_to_string(operator_type), input_stride, channels);
60 return xnn_status_invalid_parameter;
61 }
62
63 if (output_stride < channels) {
64 xnn_log_error(
65 "failed to create %s operator with output element stride of %zu: "
66 "stride must be at least as large as the number of channels (%zu)",
67 xnn_operator_type_to_string(operator_type), output_stride, channels);
68 return xnn_status_invalid_parameter;
69 }
70
71 unary_elementwise_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
72 if (unary_elementwise_op == NULL) {
73 xnn_log_error(
74 "failed to allocate %zu bytes for %s operator descriptor",
75 sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
76 return xnn_status_out_of_memory;
77 }
78
79 unary_elementwise_op->channels = channels;
80 unary_elementwise_op->input_pixel_stride = input_stride;
81 unary_elementwise_op->output_pixel_stride = output_stride;
82 if (params_size != 0) {
83 memcpy(&unary_elementwise_op->params, params, params_size);
84 }
85
86 unary_elementwise_op->ukernel.vunary.function = ukernel;
87 unary_elementwise_op->type = operator_type;
88 unary_elementwise_op->flags = flags;
89
90 unary_elementwise_op->state = xnn_run_state_invalid;
91
92 *unary_elementwise_op_out = unary_elementwise_op;
93 return xnn_status_success;
94 }
95
setup_unary_elementwise_nc(xnn_operator_t unary_elementwise_op,enum xnn_operator_type expected_operator_type,size_t batch_size,const void * input,void * output,uint32_t log2_input_size,uint32_t log2_output_size,const void * params,size_t params_size,size_t num_threads)96 static enum xnn_status setup_unary_elementwise_nc(
97 xnn_operator_t unary_elementwise_op,
98 enum xnn_operator_type expected_operator_type,
99 size_t batch_size,
100 const void* input,
101 void* output,
102 uint32_t log2_input_size,
103 uint32_t log2_output_size,
104 const void* params,
105 size_t params_size,
106 size_t num_threads)
107 {
108 if (unary_elementwise_op->type != expected_operator_type) {
109 xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
110 xnn_operator_type_to_string(expected_operator_type),
111 xnn_operator_type_to_string(unary_elementwise_op->type));
112 return xnn_status_invalid_parameter;
113 }
114 unary_elementwise_op->state = xnn_run_state_invalid;
115
116 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
117 xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
118 xnn_operator_type_to_string(unary_elementwise_op->type));
119 return xnn_status_uninitialized;
120 }
121
122 if (batch_size == 0) {
123 unary_elementwise_op->state = xnn_run_state_skip;
124 return xnn_status_success;
125 }
126
127 const size_t channels = unary_elementwise_op->channels;
128 const size_t input_stride = unary_elementwise_op->input_pixel_stride;
129 const size_t output_stride = unary_elementwise_op->output_pixel_stride;
130
131 xnn_vunary_ukernel_function ukernel = unary_elementwise_op->ukernel.vunary.function;
132
133 if ((((input_stride ^ channels) | (output_stride ^ channels)) == 0) || batch_size == 1) {
134 const size_t block_size = 4096;
135 unary_elementwise_op->context.univector_contiguous = (struct univector_contiguous_context) {
136 .x = input,
137 .y = output,
138 .log2_xsize = log2_input_size,
139 .log2_ysize = log2_output_size,
140 .ukernel = ukernel,
141 };
142 if (params_size != 0) {
143 memcpy(&unary_elementwise_op->context.univector_contiguous.params, params, params_size);
144 }
145
146 const size_t range = (batch_size * channels) << log2_input_size;
147 unary_elementwise_op->compute.type = xnn_parallelization_type_1d_tile_1d;
148 unary_elementwise_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_contiguous;
149 unary_elementwise_op->compute.range[0] = range;
150 unary_elementwise_op->compute.tile[0] = (num_threads == 1) ? range : block_size;
151 } else {
152 unary_elementwise_op->context.univector_strided = (struct univector_strided_context) {
153 .n = channels << log2_input_size,
154 .x = input,
155 .x_stride = input_stride << log2_input_size,
156 .y = output,
157 .y_stride = output_stride << log2_output_size,
158 .ukernel = ukernel,
159 };
160 if (params_size != 0) {
161 memcpy(&unary_elementwise_op->context.univector_strided.params, params, params_size);
162 }
163 unary_elementwise_op->compute.type = xnn_parallelization_type_1d_tile_1d;
164 unary_elementwise_op->compute.task_1d_tile_1d = (pthreadpool_task_1d_tile_1d_t) xnn_compute_univector_strided;
165 unary_elementwise_op->compute.range[0] = batch_size;
166 unary_elementwise_op->compute.tile[0] = (num_threads == 1) ? batch_size : 1;
167 }
168 unary_elementwise_op->state = xnn_run_state_ready;
169
170 return xnn_status_success;
171 }
172
xnn_create_clamp_nc_f16(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * clamp_op_out)173 enum xnn_status xnn_create_clamp_nc_f16(
174 size_t channels,
175 size_t input_stride,
176 size_t output_stride,
177 float output_min,
178 float output_max,
179 uint32_t flags,
180 xnn_operator_t* clamp_op_out)
181 {
182 if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
183 xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
184 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
185 return xnn_status_uninitialized;
186 }
187
188 if ((xnn_params.init_flags & XNN_INIT_FLAG_F16) != XNN_INIT_FLAG_F16) {
189 xnn_log_error("failed to create %s operator: operations on data type are not supported",
190 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
191 return xnn_status_unsupported_hardware;
192 }
193
194 if (isnan(output_min)) {
195 xnn_log_error(
196 "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
197 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
198 return xnn_status_invalid_parameter;
199 }
200
201 if (isnan(output_max)) {
202 xnn_log_error(
203 "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
204 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16));
205 return xnn_status_invalid_parameter;
206 }
207
208 const uint16_t output_min_as_half = fp16_ieee_from_fp32_value(output_min);
209 const uint16_t output_max_as_half = fp16_ieee_from_fp32_value(output_max);
210 output_min = fp16_ieee_to_fp32_value(output_min_as_half);
211 output_max = fp16_ieee_to_fp32_value(output_max_as_half);
212 if (output_min >= output_max) {
213 xnn_log_error(
214 "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
215 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f16), output_min, output_max);
216 return xnn_status_invalid_parameter;
217 }
218
219 union xnn_f16_minmax_params params;
220 if (xnn_params.f16.clamp.init.f16_minmax != NULL) {
221 xnn_params.f16.clamp.init.f16_minmax(¶ms, output_min_as_half, output_max_as_half);
222 }
223 return create_unary_elementwise_nc(
224 channels, input_stride, output_stride, flags,
225 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
226 xnn_operator_type_clamp_nc_f16,
227 xnn_params.f16.clamp.ukernel,
228 clamp_op_out);
229 }
230
xnn_create_clamp_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float output_min,float output_max,uint32_t flags,xnn_operator_t * clamp_op_out)231 enum xnn_status xnn_create_clamp_nc_f32(
232 size_t channels,
233 size_t input_stride,
234 size_t output_stride,
235 float output_min,
236 float output_max,
237 uint32_t flags,
238 xnn_operator_t* clamp_op_out)
239 {
240 if (isnan(output_min)) {
241 xnn_log_error(
242 "failed to create %s operator with NaN output lower bound: lower bound must be non-NaN",
243 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f32));
244 return xnn_status_invalid_parameter;
245 }
246
247 if (isnan(output_max)) {
248 xnn_log_error(
249 "failed to create %s operator with NaN output upper bound: upper bound must be non-NaN",
250 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f32));
251 return xnn_status_invalid_parameter;
252 }
253
254 if (output_min >= output_max) {
255 xnn_log_error(
256 "failed to create %s operator with [%.7g, %.7g] output range: lower bound must be below upper bound",
257 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_f32), output_min, output_max);
258 return xnn_status_invalid_parameter;
259 }
260
261 const bool relu_activation = (output_max == INFINITY) && (output_min == 0.0f);
262 xnn_vunary_ukernel_function clamp_ukernel = xnn_params.f32.clamp.ukernel;
263 if (relu_activation && xnn_params.f32.relu.ukernel != NULL) {
264 clamp_ukernel = xnn_params.f32.relu.ukernel;
265 }
266
267 union xnn_f32_minmax_params params;
268 if (xnn_params.f32.clamp.init.f32_minmax != NULL) {
269 xnn_params.f32.clamp.init.f32_minmax(¶ms, output_min, output_max);
270 }
271 return create_unary_elementwise_nc(
272 channels, input_stride, output_stride, flags,
273 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
274 xnn_operator_type_clamp_nc_f32,
275 clamp_ukernel,
276 clamp_op_out);
277 }
278
xnn_create_clamp_nc_s8(size_t channels,size_t input_stride,size_t output_stride,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * clamp_op_out)279 enum xnn_status xnn_create_clamp_nc_s8(
280 size_t channels,
281 size_t input_stride,
282 size_t output_stride,
283 int8_t output_min,
284 int8_t output_max,
285 uint32_t flags,
286 xnn_operator_t* clamp_op_out)
287 {
288 if (output_min >= output_max) {
289 xnn_log_error(
290 "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
291 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_s8), output_min, output_max);
292 return xnn_status_invalid_parameter;
293 }
294
295 union xnn_s8_minmax_params params;
296 if (xnn_params.s8.clamp.init.s8_minmax != NULL) {
297 xnn_params.s8.clamp.init.s8_minmax(¶ms, output_min, output_max);
298 }
299 return create_unary_elementwise_nc(
300 channels, input_stride, output_stride, flags,
301 ¶ms, sizeof(params), XNN_INIT_FLAG_S8,
302 xnn_operator_type_clamp_nc_s8,
303 xnn_params.s8.clamp.ukernel,
304 clamp_op_out);
305 }
306
xnn_create_clamp_nc_u8(size_t channels,size_t input_stride,size_t output_stride,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * clamp_op_out)307 enum xnn_status xnn_create_clamp_nc_u8(
308 size_t channels,
309 size_t input_stride,
310 size_t output_stride,
311 uint8_t output_min,
312 uint8_t output_max,
313 uint32_t flags,
314 xnn_operator_t* clamp_op_out)
315 {
316 if (output_min >= output_max) {
317 xnn_log_error(
318 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
319 xnn_operator_type_to_string(xnn_operator_type_clamp_nc_u8), output_min, output_max);
320 return xnn_status_invalid_parameter;
321 }
322
323 union xnn_u8_minmax_params params;
324 if (xnn_params.u8.clamp.init.u8_minmax != NULL) {
325 xnn_params.u8.clamp.init.u8_minmax(¶ms, output_min, output_max);
326 }
327 return create_unary_elementwise_nc(
328 channels, input_stride, output_stride, flags,
329 ¶ms, sizeof(params), XNN_INIT_FLAG_U8,
330 xnn_operator_type_clamp_nc_u8,
331 xnn_params.u8.clamp.ukernel,
332 clamp_op_out);
333 }
334
xnn_create_abs_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * abs_op_out)335 enum xnn_status xnn_create_abs_nc_f16(
336 size_t channels,
337 size_t input_stride,
338 size_t output_stride,
339 uint32_t flags,
340 xnn_operator_t* abs_op_out)
341 {
342 union xnn_f16_abs_params params;
343 if (xnn_params.f16.abs.init.f16_abs != NULL) {
344 xnn_params.f16.abs.init.f16_abs(¶ms);
345 }
346 return create_unary_elementwise_nc(
347 channels, input_stride, output_stride, flags,
348 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
349 xnn_operator_type_abs_nc_f16,
350 xnn_params.f16.abs.ukernel,
351 abs_op_out);
352 }
353
xnn_create_abs_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * abs_op_out)354 enum xnn_status xnn_create_abs_nc_f32(
355 size_t channels,
356 size_t input_stride,
357 size_t output_stride,
358 uint32_t flags,
359 xnn_operator_t* abs_op_out)
360 {
361 union xnn_f32_abs_params params;
362 if (xnn_params.f32.abs.init.f32_abs != NULL) {
363 xnn_params.f32.abs.init.f32_abs(¶ms);
364 }
365 return create_unary_elementwise_nc(
366 channels, input_stride, output_stride, flags,
367 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
368 xnn_operator_type_abs_nc_f32,
369 xnn_params.f32.abs.ukernel,
370 abs_op_out);
371 }
372
xnn_create_bankers_rounding_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * rounding_op_out)373 enum xnn_status xnn_create_bankers_rounding_nc_f16(
374 size_t channels,
375 size_t input_stride,
376 size_t output_stride,
377 uint32_t flags,
378 xnn_operator_t* rounding_op_out)
379 {
380 return create_unary_elementwise_nc(
381 channels, input_stride, output_stride, flags,
382 NULL, 0, XNN_INIT_FLAG_F16,
383 xnn_operator_type_bankers_rounding_nc_f16,
384 xnn_params.f16.rndne.ukernel,
385 rounding_op_out);
386 }
387
xnn_create_bankers_rounding_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * rounding_op_out)388 enum xnn_status xnn_create_bankers_rounding_nc_f32(
389 size_t channels,
390 size_t input_stride,
391 size_t output_stride,
392 uint32_t flags,
393 xnn_operator_t* rounding_op_out)
394 {
395 union xnn_f32_rnd_params params;
396 if (xnn_params.f32.rndne.init.f32_rnd != NULL) {
397 xnn_params.f32.rndne.init.f32_rnd(¶ms);
398 }
399 return create_unary_elementwise_nc(
400 channels, input_stride, output_stride, flags,
401 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
402 xnn_operator_type_bankers_rounding_nc_f32,
403 xnn_params.f32.rndne.ukernel,
404 rounding_op_out);
405 }
406
xnn_create_ceiling_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * ceiling_op_out)407 enum xnn_status xnn_create_ceiling_nc_f16(
408 size_t channels,
409 size_t input_stride,
410 size_t output_stride,
411 uint32_t flags,
412 xnn_operator_t* ceiling_op_out)
413 {
414 return create_unary_elementwise_nc(
415 channels, input_stride, output_stride, flags,
416 NULL, 0, XNN_INIT_FLAG_F16,
417 xnn_operator_type_ceiling_nc_f16,
418 xnn_params.f16.rndu.ukernel,
419 ceiling_op_out);
420 }
421
xnn_create_ceiling_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * ceiling_op_out)422 enum xnn_status xnn_create_ceiling_nc_f32(
423 size_t channels,
424 size_t input_stride,
425 size_t output_stride,
426 uint32_t flags,
427 xnn_operator_t* ceiling_op_out)
428 {
429 union xnn_f32_rnd_params params;
430 if (xnn_params.f32.rndu.init.f32_rnd != NULL) {
431 xnn_params.f32.rndu.init.f32_rnd(¶ms);
432 }
433 return create_unary_elementwise_nc(
434 channels, input_stride, output_stride, flags,
435 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
436 xnn_operator_type_ceiling_nc_f32,
437 xnn_params.f32.rndu.ukernel,
438 ceiling_op_out);
439 }
440
xnn_create_convert_nc_f16_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * convert_op_out)441 enum xnn_status xnn_create_convert_nc_f16_f32(
442 size_t channels,
443 size_t input_stride,
444 size_t output_stride,
445 uint32_t flags,
446 xnn_operator_t* convert_op_out)
447 {
448 union xnn_f16_f32_cvt_params params;
449 if (xnn_params.vcvt.f16_to_f32.init.f16_f32_cvt != NULL) {
450 xnn_params.vcvt.f16_to_f32.init.f16_f32_cvt(¶ms);
451 }
452 return create_unary_elementwise_nc(
453 channels, input_stride, output_stride, flags,
454 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
455 xnn_operator_type_convert_nc_f16_f32,
456 xnn_params.vcvt.f16_to_f32.ukernel,
457 convert_op_out);
458 }
459
xnn_create_convert_nc_f32_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * convert_op_out)460 enum xnn_status xnn_create_convert_nc_f32_f16(
461 size_t channels,
462 size_t input_stride,
463 size_t output_stride,
464 uint32_t flags,
465 xnn_operator_t* convert_op_out)
466 {
467 union xnn_f32_f16_cvt_params params;
468 if (xnn_params.vcvt.f32_to_f16.init.f32_f16_cvt != NULL) {
469 xnn_params.vcvt.f32_to_f16.init.f32_f16_cvt(¶ms);
470 }
471 return create_unary_elementwise_nc(
472 channels, input_stride, output_stride, flags,
473 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
474 xnn_operator_type_convert_nc_f32_f16,
475 xnn_params.vcvt.f32_to_f16.ukernel,
476 convert_op_out);
477 }
478
xnn_create_convert_nc_f32_qs8(size_t channels,size_t input_stride,size_t output_stride,float output_scale,int8_t output_zero_point,int8_t output_min,int8_t output_max,uint32_t flags,xnn_operator_t * convert_op_out)479 enum xnn_status xnn_create_convert_nc_f32_qs8(
480 size_t channels,
481 size_t input_stride,
482 size_t output_stride,
483 float output_scale,
484 int8_t output_zero_point,
485 int8_t output_min,
486 int8_t output_max,
487 uint32_t flags,
488 xnn_operator_t* convert_op_out)
489 {
490 if (output_scale <= 0.0f || !isnormal(output_scale)) {
491 xnn_log_error(
492 "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
493 xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qs8), output_scale);
494 return xnn_status_invalid_parameter;
495 }
496
497 if (output_min >= output_max) {
498 xnn_log_error(
499 "failed to create %s operator with [%" PRId8 ", %" PRId8 "] output range: range min must be below range max",
500 xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qs8), output_min, output_max);
501 return xnn_status_invalid_parameter;
502 }
503
504 union xnn_f32_qs8_cvt_params params;
505 if (xnn_params.vcvt.f32_to_qs8.init.f32_qs8_cvt != NULL) {
506 xnn_params.vcvt.f32_to_qs8.init.f32_qs8_cvt(¶ms, 1.0f / output_scale, output_zero_point, output_min, output_max);
507 }
508 return create_unary_elementwise_nc(
509 channels, input_stride, output_stride, flags,
510 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
511 xnn_operator_type_convert_nc_f32_qs8,
512 xnn_params.vcvt.f32_to_qs8.ukernel,
513 convert_op_out);
514 }
515
xnn_create_convert_nc_f32_qu8(size_t channels,size_t input_stride,size_t output_stride,float output_scale,uint8_t output_zero_point,uint8_t output_min,uint8_t output_max,uint32_t flags,xnn_operator_t * convert_op_out)516 enum xnn_status xnn_create_convert_nc_f32_qu8(
517 size_t channels,
518 size_t input_stride,
519 size_t output_stride,
520 float output_scale,
521 uint8_t output_zero_point,
522 uint8_t output_min,
523 uint8_t output_max,
524 uint32_t flags,
525 xnn_operator_t* convert_op_out)
526 {
527 if (output_scale <= 0.0f || !isnormal(output_scale)) {
528 xnn_log_error(
529 "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
530 xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qu8), output_scale);
531 return xnn_status_invalid_parameter;
532 }
533
534 if (output_min >= output_max) {
535 xnn_log_error(
536 "failed to create %s operator with [%" PRIu8 ", %" PRIu8 "] output range: range min must be below range max",
537 xnn_operator_type_to_string(xnn_operator_type_convert_nc_f32_qu8), output_min, output_max);
538 return xnn_status_invalid_parameter;
539 }
540
541 union xnn_f32_qu8_cvt_params params;
542 if (xnn_params.vcvt.f32_to_qu8.init.f32_qu8_cvt != NULL) {
543 xnn_params.vcvt.f32_to_qu8.init.f32_qu8_cvt(¶ms, 1.0f / output_scale, output_zero_point, output_min, output_max);
544 }
545 return create_unary_elementwise_nc(
546 channels, input_stride, output_stride, flags,
547 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
548 xnn_operator_type_convert_nc_f32_qu8,
549 xnn_params.vcvt.f32_to_qu8.ukernel,
550 convert_op_out);
551 }
552
xnn_create_convert_nc_qs8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,int8_t input_zero_point,float output_scale,int8_t output_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)553 enum xnn_status xnn_create_convert_nc_qs8(
554 size_t channels,
555 size_t input_stride,
556 size_t output_stride,
557 float input_scale,
558 int8_t input_zero_point,
559 float output_scale,
560 int8_t output_zero_point,
561 uint32_t flags,
562 xnn_operator_t* convert_op_out)
563 {
564 if (input_scale <= 0.0f || !isnormal(input_scale)) {
565 xnn_log_error(
566 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
567 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8), input_scale);
568 return xnn_status_invalid_parameter;
569 }
570
571 if (output_scale <= 0.0f || !isnormal(output_scale)) {
572 xnn_log_error(
573 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
574 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8), output_scale);
575 return xnn_status_invalid_parameter;
576 }
577
578 const float input_output_scale = input_scale / output_scale;
579 if (input_output_scale < 0x1.0p-8f || input_output_scale > 0x1.0p+7f) {
580 xnn_log_error(
581 "failed to create %s operator with %.7g input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
582 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8), input_output_scale);
583 return xnn_status_invalid_parameter;
584 }
585
586 union xnn_qs8_cvt_params params;
587 if (xnn_params.vcvt.qs8.init.qs8_cvt != NULL) {
588 xnn_params.vcvt.qs8.init.qs8_cvt(¶ms, input_output_scale, input_zero_point, output_zero_point);
589 }
590 return create_unary_elementwise_nc(
591 channels, input_stride, output_stride, flags,
592 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
593 xnn_operator_type_convert_nc_qs8,
594 xnn_params.vcvt.qs8.ukernel,
595 convert_op_out);
596 }
597
xnn_create_convert_nc_qs8_f32(size_t channels,size_t input_stride,size_t output_stride,float input_scale,int8_t input_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)598 enum xnn_status xnn_create_convert_nc_qs8_f32(
599 size_t channels,
600 size_t input_stride,
601 size_t output_stride,
602 float input_scale,
603 int8_t input_zero_point,
604 uint32_t flags,
605 xnn_operator_t* convert_op_out)
606 {
607 if (input_scale <= 0.0f || !isnormal(input_scale)) {
608 xnn_log_error(
609 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
610 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qs8_f32), input_scale);
611 return xnn_status_invalid_parameter;
612 }
613
614 union xnn_qs8_f32_cvt_params params;
615 if (xnn_params.vcvt.qs8_to_f32.init.qs8_f32_cvt != NULL) {
616 xnn_params.vcvt.qs8_to_f32.init.qs8_f32_cvt(¶ms, input_scale, input_zero_point);
617 }
618 return create_unary_elementwise_nc(
619 channels, input_stride, output_stride, flags,
620 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
621 xnn_operator_type_convert_nc_qs8_f32,
622 xnn_params.vcvt.qs8_to_f32.ukernel,
623 convert_op_out);
624 }
625
xnn_create_convert_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t input_zero_point,float output_scale,uint8_t output_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)626 enum xnn_status xnn_create_convert_nc_qu8(
627 size_t channels,
628 size_t input_stride,
629 size_t output_stride,
630 float input_scale,
631 uint8_t input_zero_point,
632 float output_scale,
633 uint8_t output_zero_point,
634 uint32_t flags,
635 xnn_operator_t* convert_op_out)
636 {
637 if (input_scale <= 0.0f || !isnormal(input_scale)) {
638 xnn_log_error(
639 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
640 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8), input_scale);
641 return xnn_status_invalid_parameter;
642 }
643
644 if (output_scale <= 0.0f || !isnormal(output_scale)) {
645 xnn_log_error(
646 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
647 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8), output_scale);
648 return xnn_status_invalid_parameter;
649 }
650
651 const float input_output_scale = input_scale / output_scale;
652 if (input_output_scale < 0x1.0p-8f || input_output_scale > 0x1.0p+7f) {
653 xnn_log_error(
654 "failed to create %s operator with %.7g input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
655 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8), input_output_scale);
656 return xnn_status_invalid_parameter;
657 }
658
659 union xnn_qu8_cvt_params params;
660 if (xnn_params.vcvt.qu8.init.qu8_cvt != NULL) {
661 xnn_params.vcvt.qu8.init.qu8_cvt(¶ms, input_output_scale, input_zero_point, output_zero_point);
662 }
663 return create_unary_elementwise_nc(
664 channels, input_stride, output_stride, flags,
665 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
666 xnn_operator_type_convert_nc_qu8,
667 xnn_params.vcvt.qu8.ukernel,
668 convert_op_out);
669 }
670
xnn_create_convert_nc_qu8_f32(size_t channels,size_t input_stride,size_t output_stride,float input_scale,uint8_t input_zero_point,uint32_t flags,xnn_operator_t * convert_op_out)671 enum xnn_status xnn_create_convert_nc_qu8_f32(
672 size_t channels,
673 size_t input_stride,
674 size_t output_stride,
675 float input_scale,
676 uint8_t input_zero_point,
677 uint32_t flags,
678 xnn_operator_t* convert_op_out)
679 {
680 if (input_scale <= 0.0f || !isnormal(input_scale)) {
681 xnn_log_error(
682 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
683 xnn_operator_type_to_string(xnn_operator_type_convert_nc_qu8_f32), input_scale);
684 return xnn_status_invalid_parameter;
685 }
686
687 union xnn_qu8_f32_cvt_params params;
688 if (xnn_params.vcvt.qu8_to_f32.init.qu8_f32_cvt != NULL) {
689 xnn_params.vcvt.qu8_to_f32.init.qu8_f32_cvt(¶ms, input_scale, input_zero_point);
690 }
691 return create_unary_elementwise_nc(
692 channels, input_stride, output_stride, flags,
693 ¶ms, sizeof(params), XNN_INIT_FLAG_VCVT,
694 xnn_operator_type_convert_nc_qu8_f32,
695 xnn_params.vcvt.qu8_to_f32.ukernel,
696 convert_op_out);
697 }
698
xnn_create_copy_nc_x8(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * copy_op_out)699 enum xnn_status xnn_create_copy_nc_x8(
700 size_t channels,
701 size_t input_stride,
702 size_t output_stride,
703 uint32_t flags,
704 xnn_operator_t* copy_op_out)
705 {
706 return create_unary_elementwise_nc(
707 channels, input_stride, output_stride, flags,
708 NULL, 0, XNN_INIT_FLAG_X8,
709 xnn_operator_type_copy_nc_x8,
710 xnn_params.xx.copy,
711 copy_op_out);
712 }
713
xnn_create_copy_nc_x16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * copy_op_out)714 enum xnn_status xnn_create_copy_nc_x16(
715 size_t channels,
716 size_t input_stride,
717 size_t output_stride,
718 uint32_t flags,
719 xnn_operator_t* copy_op_out)
720 {
721 return create_unary_elementwise_nc(
722 channels, input_stride, output_stride, flags,
723 NULL, 0, XNN_INIT_FLAG_X16,
724 xnn_operator_type_copy_nc_x16,
725 xnn_params.xx.copy,
726 copy_op_out);
727 }
728
xnn_create_copy_nc_x32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * copy_op_out)729 enum xnn_status xnn_create_copy_nc_x32(
730 size_t channels,
731 size_t input_stride,
732 size_t output_stride,
733 uint32_t flags,
734 xnn_operator_t* copy_op_out)
735 {
736 return create_unary_elementwise_nc(
737 channels, input_stride, output_stride, flags,
738 NULL, 0, XNN_INIT_FLAG_X32,
739 xnn_operator_type_copy_nc_x32,
740 xnn_params.xx.copy,
741 copy_op_out);
742 }
743
xnn_create_elu_nc_f16(size_t channels,size_t input_stride,size_t output_stride,float alpha,uint32_t flags,xnn_operator_t * elu_op_out)744 enum xnn_status xnn_create_elu_nc_f16(
745 size_t channels,
746 size_t input_stride,
747 size_t output_stride,
748 float alpha,
749 uint32_t flags,
750 xnn_operator_t* elu_op_out)
751 {
752 const uint16_t alpha_as_half = fp16_ieee_from_fp32_value(alpha);
753 alpha = fp16_ieee_to_fp32_value(alpha_as_half);
754 if (alpha <= 0.0f || !isnormal(alpha)) {
755 xnn_log_error(
756 "failed to create %s operator with %.7g alpha parameter: alpha must be finite, normalized, and positive",
757 xnn_operator_type_to_string(xnn_operator_type_elu_nc_f16), alpha);
758 return xnn_status_invalid_parameter;
759 }
760
761 union xnn_f16_elu_params params;
762 if (xnn_params.f16.elu.init.f16_elu != NULL) {
763 xnn_params.f16.elu.init.f16_elu(¶ms,
764 UINT16_C(0x3C00) /* prescale = 1.0h */, alpha_as_half, UINT16_C(0x3C00) /* beta = 1.0h */);
765 }
766 return create_unary_elementwise_nc(
767 channels, input_stride, output_stride, flags,
768 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
769 xnn_operator_type_elu_nc_f16,
770 xnn_params.f16.elu.ukernel,
771 elu_op_out);
772 }
773
xnn_create_elu_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float alpha,uint32_t flags,xnn_operator_t * elu_op_out)774 enum xnn_status xnn_create_elu_nc_f32(
775 size_t channels,
776 size_t input_stride,
777 size_t output_stride,
778 float alpha,
779 uint32_t flags,
780 xnn_operator_t* elu_op_out)
781 {
782 if (alpha <= 0.0f || !isnormal(alpha)) {
783 xnn_log_error(
784 "failed to create %s operator with %.7g alpha parameter: alpha must be finite, normalized, and positive",
785 xnn_operator_type_to_string(xnn_operator_type_elu_nc_f32), alpha);
786 return xnn_status_invalid_parameter;
787 }
788
789 union xnn_f32_elu_params params;
790 if (xnn_params.f32.elu.init.f32_elu != NULL) {
791 xnn_params.f32.elu.init.f32_elu(¶ms, 1.0f /* prescale */, alpha, 1.0f /* beta */);
792 }
793 return create_unary_elementwise_nc(
794 channels, input_stride, output_stride, flags,
795 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
796 xnn_operator_type_elu_nc_f32,
797 xnn_params.f32.elu.ukernel,
798 elu_op_out);
799 }
800
xnn_create_floor_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * floor_op_out)801 enum xnn_status xnn_create_floor_nc_f16(
802 size_t channels,
803 size_t input_stride,
804 size_t output_stride,
805 uint32_t flags,
806 xnn_operator_t* floor_op_out)
807 {
808 return create_unary_elementwise_nc(
809 channels, input_stride, output_stride, flags,
810 NULL, 0, XNN_INIT_FLAG_F16,
811 xnn_operator_type_floor_nc_f16,
812 xnn_params.f16.rndd.ukernel,
813 floor_op_out);
814 }
815
xnn_create_floor_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * floor_op_out)816 enum xnn_status xnn_create_floor_nc_f32(
817 size_t channels,
818 size_t input_stride,
819 size_t output_stride,
820 uint32_t flags,
821 xnn_operator_t* floor_op_out)
822 {
823 union xnn_f32_rnd_params params;
824 if (xnn_params.f32.rndd.init.f32_rnd != NULL) {
825 xnn_params.f32.rndd.init.f32_rnd(¶ms);
826 }
827 return create_unary_elementwise_nc(
828 channels, input_stride, output_stride, flags,
829 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
830 xnn_operator_type_floor_nc_f32,
831 xnn_params.f32.rndd.ukernel,
832 floor_op_out);
833 }
834
xnn_create_hardswish_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * hardswish_op_out)835 enum xnn_status xnn_create_hardswish_nc_f16(
836 size_t channels,
837 size_t input_stride,
838 size_t output_stride,
839 uint32_t flags,
840 xnn_operator_t* hardswish_op_out)
841 {
842 union xnn_f16_hswish_params params;
843 if (xnn_params.f16.hswish.init.f16_hswish != NULL) {
844 xnn_params.f16.hswish.init.f16_hswish(¶ms);
845 }
846 return create_unary_elementwise_nc(
847 channels, input_stride, output_stride, flags,
848 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
849 xnn_operator_type_hardswish_nc_f16,
850 xnn_params.f16.hswish.ukernel,
851 hardswish_op_out);
852 }
853
xnn_create_hardswish_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * hardswish_op_out)854 enum xnn_status xnn_create_hardswish_nc_f32(
855 size_t channels,
856 size_t input_stride,
857 size_t output_stride,
858 uint32_t flags,
859 xnn_operator_t* hardswish_op_out)
860 {
861 union xnn_f32_hswish_params params;
862 if (xnn_params.f32.hswish.init.f32_hswish != NULL) {
863 xnn_params.f32.hswish.init.f32_hswish(¶ms);
864 }
865 return create_unary_elementwise_nc(
866 channels, input_stride, output_stride, flags,
867 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
868 xnn_operator_type_hardswish_nc_f32,
869 xnn_params.f32.hswish.ukernel,
870 hardswish_op_out);
871 }
872
xnn_create_leaky_relu_nc_f16(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint32_t flags,xnn_operator_t * leaky_relu_op_out)873 enum xnn_status xnn_create_leaky_relu_nc_f16(
874 size_t channels,
875 size_t input_stride,
876 size_t output_stride,
877 float negative_slope,
878 uint32_t flags,
879 xnn_operator_t* leaky_relu_op_out)
880 {
881 const uint16_t negative_slope_as_half = fp16_ieee_from_fp32_value(negative_slope);
882 negative_slope = fp16_ieee_to_fp32_value(negative_slope_as_half);
883 if (!isfinite(negative_slope)) {
884 xnn_log_error(
885 "failed to create %s operator with %f negative slope: finite number expected",
886 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_f32),
887 negative_slope);
888 return xnn_status_invalid_parameter;
889 }
890
891 union xnn_f16_lrelu_params params;
892 if (xnn_params.f16.lrelu.init.f16_lrelu != NULL) {
893 xnn_params.f16.lrelu.init.f16_lrelu(¶ms, negative_slope_as_half);
894 }
895 return create_unary_elementwise_nc(
896 channels, input_stride, output_stride, flags,
897 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
898 xnn_operator_type_leaky_relu_nc_f16,
899 xnn_params.f16.lrelu.ukernel,
900 leaky_relu_op_out);
901 }
902
xnn_create_leaky_relu_nc_f32(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint32_t flags,xnn_operator_t * leaky_relu_op_out)903 enum xnn_status xnn_create_leaky_relu_nc_f32(
904 size_t channels,
905 size_t input_stride,
906 size_t output_stride,
907 float negative_slope,
908 uint32_t flags,
909 xnn_operator_t* leaky_relu_op_out)
910 {
911 if (!isfinite(negative_slope)) {
912 xnn_log_error(
913 "failed to create %s operator with %f negative slope: finite number expected",
914 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_f32),
915 negative_slope);
916 return xnn_status_invalid_parameter;
917 }
918
919 union xnn_f32_lrelu_params params;
920 if (xnn_params.f32.lrelu.init.f32_lrelu != NULL) {
921 xnn_params.f32.lrelu.init.f32_lrelu(¶ms, negative_slope);
922 }
923 return create_unary_elementwise_nc(
924 channels, input_stride, output_stride, flags,
925 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
926 xnn_operator_type_leaky_relu_nc_f32,
927 xnn_params.f32.lrelu.ukernel,
928 leaky_relu_op_out);
929 }
930
xnn_create_leaky_relu_nc_qs8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,int8_t input_zero_point,float input_scale,int8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * leaky_relu_op_out)931 enum xnn_status xnn_create_leaky_relu_nc_qs8(
932 size_t channels,
933 size_t input_stride,
934 size_t output_stride,
935 float negative_slope,
936 int8_t input_zero_point,
937 float input_scale,
938 int8_t output_zero_point,
939 float output_scale,
940 uint32_t flags,
941 xnn_operator_t* leaky_relu_op_out)
942 {
943 if (!isfinite(negative_slope)) {
944 xnn_log_error(
945 "failed to create %s operator with %f negative slope: finite number expected",
946 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8),
947 negative_slope);
948 return xnn_status_invalid_parameter;
949 }
950
951 if (input_scale <= 0.0f || !isnormal(input_scale)) {
952 xnn_log_error(
953 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
954 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), input_scale);
955 return xnn_status_invalid_parameter;
956 }
957
958 if (output_scale <= 0.0f || !isnormal(output_scale)) {
959 xnn_log_error(
960 "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
961 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), input_scale);
962 return xnn_status_invalid_parameter;
963 }
964
965 const float positive_input_output_scale = input_scale / output_scale;
966 if (positive_input_output_scale < 0x1.0p-8f || positive_input_output_scale > 0x1.0p+7f) {
967 xnn_log_error(
968 "failed to create %s operator with %.7g positive-input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
969 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), positive_input_output_scale);
970 return xnn_status_invalid_parameter;
971 }
972
973 const float negative_input_output_scale = positive_input_output_scale * negative_slope;
974 if (negative_input_output_scale < -0x1.FFFC00p+6f || negative_input_output_scale > 0x1.0p+7f) {
975 xnn_log_error(
976 "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be in (-2**7, 2**7] range and ",
977 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), negative_input_output_scale);
978 return xnn_status_invalid_parameter;
979 }
980
981 if (fabsf(negative_input_output_scale) < 0x1.0p-8f) {
982 xnn_log_error(
983 "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be at least 2**-8 in absolute value",
984 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qs8), negative_input_output_scale);
985 return xnn_status_invalid_parameter;
986 }
987
988 union xnn_qs8_lrelu_params params;
989 if (xnn_params.qs8.lrelu.init.qs8_lrelu != NULL) {
990 xnn_params.qs8.lrelu.init.qs8_lrelu(¶ms, positive_input_output_scale, negative_input_output_scale, input_zero_point, output_zero_point);
991 }
992 return create_unary_elementwise_nc(
993 channels, input_stride, output_stride, flags,
994 ¶ms, sizeof(params), XNN_INIT_FLAG_QS8,
995 xnn_operator_type_leaky_relu_nc_qs8,
996 xnn_params.qs8.lrelu.ukernel,
997 leaky_relu_op_out);
998 }
999
xnn_create_leaky_relu_nc_qu8(size_t channels,size_t input_stride,size_t output_stride,float negative_slope,uint8_t input_zero_point,float input_scale,uint8_t output_zero_point,float output_scale,uint32_t flags,xnn_operator_t * leaky_relu_op_out)1000 enum xnn_status xnn_create_leaky_relu_nc_qu8(
1001 size_t channels,
1002 size_t input_stride,
1003 size_t output_stride,
1004 float negative_slope,
1005 uint8_t input_zero_point,
1006 float input_scale,
1007 uint8_t output_zero_point,
1008 float output_scale,
1009 uint32_t flags,
1010 xnn_operator_t* leaky_relu_op_out)
1011 {
1012 if (!isfinite(negative_slope)) {
1013 xnn_log_error(
1014 "failed to create %s operator with %f negative slope: finite number expected",
1015 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8),
1016 negative_slope);
1017 return xnn_status_invalid_parameter;
1018 }
1019
1020 if (input_scale <= 0.0f || !isnormal(input_scale)) {
1021 xnn_log_error(
1022 "failed to create %s operator with %.7g input scale parameter: scale must be finite, normalized, and positive",
1023 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
1024 return xnn_status_invalid_parameter;
1025 }
1026
1027 if (output_scale <= 0.0f || !isnormal(output_scale)) {
1028 xnn_log_error(
1029 "failed to create %s operator with %.7g output scale parameter: scale must be finite, normalized, and positive",
1030 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), input_scale);
1031 return xnn_status_invalid_parameter;
1032 }
1033
1034 const float positive_input_output_scale = input_scale / output_scale;
1035 if (positive_input_output_scale < 0x1.0p-8f || positive_input_output_scale > 0x1.0p+7f) {
1036 xnn_log_error(
1037 "failed to create %s operator with %.7g positive-input-to-output scale ratio: scale ratio must be in [2**-8, 2**7] range",
1038 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), positive_input_output_scale);
1039 return xnn_status_invalid_parameter;
1040 }
1041
1042 const float negative_input_output_scale = positive_input_output_scale * negative_slope;
1043 if (negative_input_output_scale < -0x1.FFFC00p+6f || negative_input_output_scale > 0x1.0p+7f) {
1044 xnn_log_error(
1045 "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be in (-2**7, 2**7] range and ",
1046 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_input_output_scale);
1047 return xnn_status_invalid_parameter;
1048 }
1049
1050 if (fabsf(negative_input_output_scale) < 0x1.0p-8f) {
1051 xnn_log_error(
1052 "failed to create %s operator with %.7g negative-input-to-output scale ratio: scale ratio must be at least 2**-8 in absolute value",
1053 xnn_operator_type_to_string(xnn_operator_type_leaky_relu_nc_qu8), negative_input_output_scale);
1054 return xnn_status_invalid_parameter;
1055 }
1056
1057 union xnn_qu8_lrelu_params params;
1058 if (xnn_params.qu8.lrelu.init.qu8_lrelu != NULL) {
1059 xnn_params.qu8.lrelu.init.qu8_lrelu(¶ms, positive_input_output_scale, negative_input_output_scale, input_zero_point, output_zero_point);
1060 }
1061 return create_unary_elementwise_nc(
1062 channels, input_stride, output_stride, flags,
1063 ¶ms, sizeof(params), XNN_INIT_FLAG_QU8,
1064 xnn_operator_type_leaky_relu_nc_qu8,
1065 xnn_params.qu8.lrelu.ukernel,
1066 leaky_relu_op_out);
1067 }
1068
xnn_create_negate_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * negate_op_out)1069 enum xnn_status xnn_create_negate_nc_f16(
1070 size_t channels,
1071 size_t input_stride,
1072 size_t output_stride,
1073 uint32_t flags,
1074 xnn_operator_t* negate_op_out)
1075 {
1076 union xnn_f16_neg_params params;
1077 if (xnn_params.f16.neg.init.f16_neg != NULL) {
1078 xnn_params.f16.neg.init.f16_neg(¶ms);
1079 }
1080 return create_unary_elementwise_nc(
1081 channels, input_stride, output_stride, flags,
1082 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
1083 xnn_operator_type_negate_nc_f16,
1084 xnn_params.f16.neg.ukernel,
1085 negate_op_out);
1086 }
1087
xnn_create_negate_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * negate_op_out)1088 enum xnn_status xnn_create_negate_nc_f32(
1089 size_t channels,
1090 size_t input_stride,
1091 size_t output_stride,
1092 uint32_t flags,
1093 xnn_operator_t* negate_op_out)
1094 {
1095 union xnn_f32_neg_params params;
1096 if (xnn_params.f32.neg.init.f32_neg != NULL) {
1097 xnn_params.f32.neg.init.f32_neg(¶ms);
1098 }
1099 return create_unary_elementwise_nc(
1100 channels, input_stride, output_stride, flags,
1101 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
1102 xnn_operator_type_negate_nc_f32,
1103 xnn_params.f32.neg.ukernel,
1104 negate_op_out);
1105 }
1106
xnn_create_sigmoid_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sigmoid_op_out)1107 enum xnn_status xnn_create_sigmoid_nc_f16(
1108 size_t channels,
1109 size_t input_stride,
1110 size_t output_stride,
1111 uint32_t flags,
1112 xnn_operator_t* sigmoid_op_out)
1113 {
1114 if ((xnn_params.init_flags & XNN_INIT_FLAG_F16) != XNN_INIT_FLAG_F16) {
1115 xnn_log_error("failed to create %s operator: operations on data type are not supported",
1116 xnn_operator_type_to_string(xnn_operator_type_sigmoid_nc_f16));
1117 return xnn_status_unsupported_hardware;
1118 }
1119
1120 union xnn_f16_sigmoid_params params;
1121 if (xnn_params.f16.sigmoid.init.f16_sigmoid != NULL) {
1122 xnn_params.f16.sigmoid.init.f16_sigmoid(¶ms);
1123 }
1124 return create_unary_elementwise_nc(
1125 channels, input_stride, output_stride, flags,
1126 ¶ms, sizeof(params), XNN_INIT_FLAG_F16,
1127 xnn_operator_type_sigmoid_nc_f16,
1128 xnn_params.f16.sigmoid.ukernel,
1129 sigmoid_op_out);
1130 }
1131
xnn_create_sigmoid_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sigmoid_op_out)1132 enum xnn_status xnn_create_sigmoid_nc_f32(
1133 size_t channels,
1134 size_t input_stride,
1135 size_t output_stride,
1136 uint32_t flags,
1137 xnn_operator_t* sigmoid_op_out)
1138 {
1139 union xnn_f32_sigmoid_params params;
1140 if (xnn_params.f32.sigmoid.init.f32_sigmoid != NULL) {
1141 xnn_params.f32.sigmoid.init.f32_sigmoid(¶ms);
1142 }
1143 return create_unary_elementwise_nc(
1144 channels, input_stride, output_stride, flags,
1145 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
1146 xnn_operator_type_sigmoid_nc_f32,
1147 xnn_params.f32.sigmoid.ukernel,
1148 sigmoid_op_out);
1149 }
1150
xnn_create_square_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * square_op_out)1151 enum xnn_status xnn_create_square_nc_f16(
1152 size_t channels,
1153 size_t input_stride,
1154 size_t output_stride,
1155 uint32_t flags,
1156 xnn_operator_t* square_op_out)
1157 {
1158 return create_unary_elementwise_nc(
1159 channels, input_stride, output_stride, flags,
1160 NULL, 0, XNN_INIT_FLAG_F16,
1161 xnn_operator_type_square_nc_f16,
1162 xnn_params.f16.sqr.ukernel,
1163 square_op_out);
1164 }
1165
xnn_create_square_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * square_op_out)1166 enum xnn_status xnn_create_square_nc_f32(
1167 size_t channels,
1168 size_t input_stride,
1169 size_t output_stride,
1170 uint32_t flags,
1171 xnn_operator_t* square_op_out)
1172 {
1173 union xnn_f32_default_params params;
1174 if (xnn_params.f32.sqr.init.f32_default != NULL) {
1175 xnn_params.f32.sqr.init.f32_default(¶ms);
1176 }
1177 return create_unary_elementwise_nc(
1178 channels, input_stride, output_stride, flags,
1179 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
1180 xnn_operator_type_square_nc_f32,
1181 xnn_params.f32.sqr.ukernel,
1182 square_op_out);
1183 }
1184
xnn_create_square_root_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sqrt_op_out)1185 enum xnn_status xnn_create_square_root_nc_f16(
1186 size_t channels,
1187 size_t input_stride,
1188 size_t output_stride,
1189 uint32_t flags,
1190 xnn_operator_t* sqrt_op_out)
1191 {
1192 return create_unary_elementwise_nc(
1193 channels, input_stride, output_stride, flags,
1194 NULL, 0, XNN_INIT_FLAG_F16,
1195 xnn_operator_type_square_root_nc_f16,
1196 xnn_params.f16.sqrt.ukernel,
1197 sqrt_op_out);
1198 }
1199
xnn_create_square_root_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * sqrt_op_out)1200 enum xnn_status xnn_create_square_root_nc_f32(
1201 size_t channels,
1202 size_t input_stride,
1203 size_t output_stride,
1204 uint32_t flags,
1205 xnn_operator_t* sqrt_op_out)
1206 {
1207 union xnn_f32_sqrt_params params;
1208 if (xnn_params.f32.sqrt.init.f32_sqrt != NULL) {
1209 xnn_params.f32.sqrt.init.f32_sqrt(¶ms);
1210 }
1211 return create_unary_elementwise_nc(
1212 channels, input_stride, output_stride, flags,
1213 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
1214 xnn_operator_type_square_root_nc_f32,
1215 xnn_params.f32.sqrt.ukernel,
1216 sqrt_op_out);
1217 }
1218
xnn_create_truncation_nc_f16(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * truncation_op_out)1219 enum xnn_status xnn_create_truncation_nc_f16(
1220 size_t channels,
1221 size_t input_stride,
1222 size_t output_stride,
1223 uint32_t flags,
1224 xnn_operator_t* truncation_op_out)
1225 {
1226 return create_unary_elementwise_nc(
1227 channels, input_stride, output_stride, flags,
1228 NULL, 0, XNN_INIT_FLAG_F16,
1229 xnn_operator_type_truncation_nc_f16,
1230 xnn_params.f16.rndz.ukernel,
1231 truncation_op_out);
1232 }
1233
xnn_create_truncation_nc_f32(size_t channels,size_t input_stride,size_t output_stride,uint32_t flags,xnn_operator_t * truncation_op_out)1234 enum xnn_status xnn_create_truncation_nc_f32(
1235 size_t channels,
1236 size_t input_stride,
1237 size_t output_stride,
1238 uint32_t flags,
1239 xnn_operator_t* truncation_op_out)
1240 {
1241 union xnn_f32_rnd_params params;
1242 if (xnn_params.f32.rndz.init.f32_rnd != NULL) {
1243 xnn_params.f32.rndz.init.f32_rnd(¶ms);
1244 }
1245 return create_unary_elementwise_nc(
1246 channels, input_stride, output_stride, flags,
1247 ¶ms, sizeof(params), XNN_INIT_FLAG_F32,
1248 xnn_operator_type_truncation_nc_f32,
1249 xnn_params.f32.rndz.ukernel,
1250 truncation_op_out);
1251 }
1252
xnn_setup_abs_nc_f16(xnn_operator_t abs_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1253 enum xnn_status xnn_setup_abs_nc_f16(
1254 xnn_operator_t abs_op,
1255 size_t batch_size,
1256 const void* input,
1257 void* output,
1258 pthreadpool_t threadpool)
1259 {
1260 return setup_unary_elementwise_nc(
1261 abs_op, xnn_operator_type_abs_nc_f16,
1262 batch_size, input, output,
1263 1 /* log2(sizeof(uint16_t)) */,
1264 1 /* log2(sizeof(uint16_t)) */,
1265 &abs_op->params.f16_abs, sizeof(abs_op->params.f16_abs),
1266 pthreadpool_get_threads_count(threadpool));
1267 }
1268
xnn_setup_abs_nc_f32(xnn_operator_t abs_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1269 enum xnn_status xnn_setup_abs_nc_f32(
1270 xnn_operator_t abs_op,
1271 size_t batch_size,
1272 const float* input,
1273 float* output,
1274 pthreadpool_t threadpool)
1275 {
1276 return setup_unary_elementwise_nc(
1277 abs_op, xnn_operator_type_abs_nc_f32,
1278 batch_size, input, output,
1279 2 /* log2(sizeof(float)) */,
1280 2 /* log2(sizeof(float)) */,
1281 &abs_op->params.f32_abs, sizeof(abs_op->params.f32_abs),
1282 pthreadpool_get_threads_count(threadpool));
1283 }
1284
xnn_setup_bankers_rounding_nc_f16(xnn_operator_t rounding_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1285 enum xnn_status xnn_setup_bankers_rounding_nc_f16(
1286 xnn_operator_t rounding_op,
1287 size_t batch_size,
1288 const void* input,
1289 void* output,
1290 pthreadpool_t threadpool)
1291 {
1292 return setup_unary_elementwise_nc(
1293 rounding_op, xnn_operator_type_bankers_rounding_nc_f16,
1294 batch_size, input, output,
1295 1 /* log2(sizeof(half)) */,
1296 1 /* log2(sizeof(half)) */,
1297 NULL, 0,
1298 pthreadpool_get_threads_count(threadpool));
1299 }
1300
xnn_setup_bankers_rounding_nc_f32(xnn_operator_t rounding_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1301 enum xnn_status xnn_setup_bankers_rounding_nc_f32(
1302 xnn_operator_t rounding_op,
1303 size_t batch_size,
1304 const float* input,
1305 float* output,
1306 pthreadpool_t threadpool)
1307 {
1308 return setup_unary_elementwise_nc(
1309 rounding_op, xnn_operator_type_bankers_rounding_nc_f32,
1310 batch_size, input, output,
1311 2 /* log2(sizeof(float)) */,
1312 2 /* log2(sizeof(float)) */,
1313 &rounding_op->params.f32_rnd, sizeof(rounding_op->params.f32_rnd),
1314 pthreadpool_get_threads_count(threadpool));
1315 }
1316
xnn_setup_ceiling_nc_f16(xnn_operator_t ceiling_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1317 enum xnn_status xnn_setup_ceiling_nc_f16(
1318 xnn_operator_t ceiling_op,
1319 size_t batch_size,
1320 const void* input,
1321 void* output,
1322 pthreadpool_t threadpool)
1323 {
1324 return setup_unary_elementwise_nc(
1325 ceiling_op, xnn_operator_type_ceiling_nc_f16,
1326 batch_size, input, output,
1327 1 /* log2(sizeof(half)) */,
1328 1 /* log2(sizeof(half)) */,
1329 NULL, 0,
1330 pthreadpool_get_threads_count(threadpool));
1331 }
1332
xnn_setup_ceiling_nc_f32(xnn_operator_t ceiling_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1333 enum xnn_status xnn_setup_ceiling_nc_f32(
1334 xnn_operator_t ceiling_op,
1335 size_t batch_size,
1336 const float* input,
1337 float* output,
1338 pthreadpool_t threadpool)
1339 {
1340 return setup_unary_elementwise_nc(
1341 ceiling_op, xnn_operator_type_ceiling_nc_f32,
1342 batch_size, input, output,
1343 2 /* log2(sizeof(float)) */,
1344 2 /* log2(sizeof(float)) */,
1345 &ceiling_op->params.f32_rnd, sizeof(ceiling_op->params.f32_rnd),
1346 pthreadpool_get_threads_count(threadpool));
1347 }
1348
xnn_setup_clamp_nc_f16(xnn_operator_t clamp_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1349 enum xnn_status xnn_setup_clamp_nc_f16(
1350 xnn_operator_t clamp_op,
1351 size_t batch_size,
1352 const void* input,
1353 void* output,
1354 pthreadpool_t threadpool)
1355 {
1356 return setup_unary_elementwise_nc(
1357 clamp_op, xnn_operator_type_clamp_nc_f16,
1358 batch_size, input, output,
1359 1 /* log2(sizeof(uint16_t)) */,
1360 1 /* log2(sizeof(uint16_t)) */,
1361 &clamp_op->params.f16_minmax, sizeof(clamp_op->params.f16_minmax),
1362 pthreadpool_get_threads_count(threadpool));
1363 }
1364
xnn_setup_clamp_nc_f32(xnn_operator_t clamp_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1365 enum xnn_status xnn_setup_clamp_nc_f32(
1366 xnn_operator_t clamp_op,
1367 size_t batch_size,
1368 const float* input,
1369 float* output,
1370 pthreadpool_t threadpool)
1371 {
1372 return setup_unary_elementwise_nc(
1373 clamp_op, xnn_operator_type_clamp_nc_f32,
1374 batch_size, input, output,
1375 2 /* log2(sizeof(float)) */,
1376 2 /* log2(sizeof(float)) */,
1377 &clamp_op->params.f32_minmax, sizeof(clamp_op->params.f32_minmax),
1378 pthreadpool_get_threads_count(threadpool));
1379 }
1380
xnn_setup_clamp_nc_s8(xnn_operator_t clamp_op,size_t batch_size,const int8_t * input,int8_t * output,pthreadpool_t threadpool)1381 enum xnn_status xnn_setup_clamp_nc_s8(
1382 xnn_operator_t clamp_op,
1383 size_t batch_size,
1384 const int8_t* input,
1385 int8_t* output,
1386 pthreadpool_t threadpool)
1387 {
1388 return setup_unary_elementwise_nc(
1389 clamp_op, xnn_operator_type_clamp_nc_s8,
1390 batch_size, input, output,
1391 0 /* log2(sizeof(int8_t)) */,
1392 0 /* log2(sizeof(int8_t)) */,
1393 &clamp_op->params.s8_minmax, sizeof(clamp_op->params.s8_minmax),
1394 pthreadpool_get_threads_count(threadpool));
1395 }
1396
xnn_setup_clamp_nc_u8(xnn_operator_t clamp_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)1397 enum xnn_status xnn_setup_clamp_nc_u8(
1398 xnn_operator_t clamp_op,
1399 size_t batch_size,
1400 const uint8_t* input,
1401 uint8_t* output,
1402 pthreadpool_t threadpool)
1403 {
1404 return setup_unary_elementwise_nc(
1405 clamp_op, xnn_operator_type_clamp_nc_u8,
1406 batch_size, input, output,
1407 0 /* log2(sizeof(uint8_t)) */,
1408 0 /* log2(sizeof(uint8_t)) */,
1409 &clamp_op->params.u8_minmax, sizeof(clamp_op->params.u8_minmax),
1410 pthreadpool_get_threads_count(threadpool));
1411 }
1412
xnn_setup_convert_nc_f16_f32(xnn_operator_t convert_op,size_t batch_size,const void * input,float * output,pthreadpool_t threadpool)1413 enum xnn_status xnn_setup_convert_nc_f16_f32(
1414 xnn_operator_t convert_op,
1415 size_t batch_size,
1416 const void* input,
1417 float* output,
1418 pthreadpool_t threadpool)
1419 {
1420 return setup_unary_elementwise_nc(
1421 convert_op, xnn_operator_type_convert_nc_f16_f32,
1422 batch_size, input, output,
1423 1 /* log2(sizeof(uint16_t)) */,
1424 2 /* log2(sizeof(float)) */,
1425 &convert_op->params.f16_f32_cvt, sizeof(convert_op->params.f16_f32_cvt),
1426 pthreadpool_get_threads_count(threadpool));
1427 }
1428
xnn_setup_convert_nc_f32_f16(xnn_operator_t convert_op,size_t batch_size,const float * input,void * output,pthreadpool_t threadpool)1429 enum xnn_status xnn_setup_convert_nc_f32_f16(
1430 xnn_operator_t convert_op,
1431 size_t batch_size,
1432 const float* input,
1433 void* output,
1434 pthreadpool_t threadpool)
1435 {
1436 return setup_unary_elementwise_nc(
1437 convert_op, xnn_operator_type_convert_nc_f32_f16,
1438 batch_size, input, output,
1439 2 /* log2(sizeof(float)) */,
1440 1 /* log2(sizeof(uint16_t)) */,
1441 &convert_op->params.f32_f16_cvt, sizeof(convert_op->params.f32_f16_cvt),
1442 pthreadpool_get_threads_count(threadpool));
1443 }
1444
xnn_setup_convert_nc_f32_qs8(xnn_operator_t convert_op,size_t batch_size,const float * input,int8_t * output,pthreadpool_t threadpool)1445 enum xnn_status xnn_setup_convert_nc_f32_qs8(
1446 xnn_operator_t convert_op,
1447 size_t batch_size,
1448 const float* input,
1449 int8_t* output,
1450 pthreadpool_t threadpool)
1451 {
1452 return setup_unary_elementwise_nc(
1453 convert_op, xnn_operator_type_convert_nc_f32_qs8,
1454 batch_size, input, output,
1455 2 /* log2(sizeof(float)) */,
1456 0 /* log2(sizeof(int8_t)) */,
1457 &convert_op->params.f32_qs8_cvt, sizeof(convert_op->params.f32_qs8_cvt),
1458 pthreadpool_get_threads_count(threadpool));
1459 }
1460
xnn_setup_convert_nc_f32_qu8(xnn_operator_t convert_op,size_t batch_size,const float * input,uint8_t * output,pthreadpool_t threadpool)1461 enum xnn_status xnn_setup_convert_nc_f32_qu8(
1462 xnn_operator_t convert_op,
1463 size_t batch_size,
1464 const float* input,
1465 uint8_t* output,
1466 pthreadpool_t threadpool)
1467 {
1468 return setup_unary_elementwise_nc(
1469 convert_op, xnn_operator_type_convert_nc_f32_qu8,
1470 batch_size, input, output,
1471 2 /* log2(sizeof(float)) */,
1472 0 /* log2(sizeof(uint8_t)) */,
1473 &convert_op->params.f32_qu8_cvt, sizeof(convert_op->params.f32_qu8_cvt),
1474 pthreadpool_get_threads_count(threadpool));
1475 }
1476
xnn_setup_convert_nc_qs8(xnn_operator_t convert_op,size_t batch_size,const int8_t * input,int8_t * output,pthreadpool_t threadpool)1477 enum xnn_status xnn_setup_convert_nc_qs8(
1478 xnn_operator_t convert_op,
1479 size_t batch_size,
1480 const int8_t* input,
1481 int8_t* output,
1482 pthreadpool_t threadpool)
1483 {
1484 return setup_unary_elementwise_nc(
1485 convert_op, xnn_operator_type_convert_nc_qs8,
1486 batch_size, input, output,
1487 0 /* log2(sizeof(int8_t)) */,
1488 0 /* log2(sizeof(int8_t)) */,
1489 &convert_op->params.qs8_cvt, sizeof(convert_op->params.qs8_cvt),
1490 pthreadpool_get_threads_count(threadpool));
1491 }
1492
xnn_setup_convert_nc_qs8_f32(xnn_operator_t convert_op,size_t batch_size,const int8_t * input,float * output,pthreadpool_t threadpool)1493 enum xnn_status xnn_setup_convert_nc_qs8_f32(
1494 xnn_operator_t convert_op,
1495 size_t batch_size,
1496 const int8_t* input,
1497 float* output,
1498 pthreadpool_t threadpool)
1499 {
1500 return setup_unary_elementwise_nc(
1501 convert_op, xnn_operator_type_convert_nc_qs8_f32,
1502 batch_size, input, output,
1503 0 /* log2(sizeof(int8_t)) */,
1504 2 /* log2(sizeof(float)) */,
1505 &convert_op->params.qs8_f32_cvt, sizeof(convert_op->params.qs8_f32_cvt),
1506 pthreadpool_get_threads_count(threadpool));
1507 }
1508
xnn_setup_convert_nc_qu8(xnn_operator_t convert_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)1509 enum xnn_status xnn_setup_convert_nc_qu8(
1510 xnn_operator_t convert_op,
1511 size_t batch_size,
1512 const uint8_t* input,
1513 uint8_t* output,
1514 pthreadpool_t threadpool)
1515 {
1516 return setup_unary_elementwise_nc(
1517 convert_op, xnn_operator_type_convert_nc_qu8,
1518 batch_size, input, output,
1519 0 /* log2(sizeof(uint8_t)) */,
1520 0 /* log2(sizeof(uint8_t)) */,
1521 &convert_op->params.qu8_cvt, sizeof(convert_op->params.qu8_cvt),
1522 pthreadpool_get_threads_count(threadpool));
1523 }
1524
xnn_setup_convert_nc_qu8_f32(xnn_operator_t convert_op,size_t batch_size,const uint8_t * input,float * output,pthreadpool_t threadpool)1525 enum xnn_status xnn_setup_convert_nc_qu8_f32(
1526 xnn_operator_t convert_op,
1527 size_t batch_size,
1528 const uint8_t* input,
1529 float* output,
1530 pthreadpool_t threadpool)
1531 {
1532 return setup_unary_elementwise_nc(
1533 convert_op, xnn_operator_type_convert_nc_qu8_f32,
1534 batch_size, input, output,
1535 0 /* log2(sizeof(uint8_t)) */,
1536 2 /* log2(sizeof(float)) */,
1537 &convert_op->params.qu8_f32_cvt, sizeof(convert_op->params.qu8_f32_cvt),
1538 pthreadpool_get_threads_count(threadpool));
1539 }
1540
xnn_setup_copy_nc_x8(xnn_operator_t copy_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1541 enum xnn_status xnn_setup_copy_nc_x8(
1542 xnn_operator_t copy_op,
1543 size_t batch_size,
1544 const void* input,
1545 void* output,
1546 pthreadpool_t threadpool)
1547 {
1548 return setup_unary_elementwise_nc(
1549 copy_op, xnn_operator_type_copy_nc_x8,
1550 batch_size, input, output,
1551 0 /* log2(sizeof(uint16_t)) */,
1552 0 /* log2(sizeof(uint16_t)) */,
1553 NULL, 0,
1554 pthreadpool_get_threads_count(threadpool));
1555 }
1556
xnn_setup_copy_nc_x16(xnn_operator_t copy_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1557 enum xnn_status xnn_setup_copy_nc_x16(
1558 xnn_operator_t copy_op,
1559 size_t batch_size,
1560 const void* input,
1561 void* output,
1562 pthreadpool_t threadpool)
1563 {
1564 return setup_unary_elementwise_nc(
1565 copy_op, xnn_operator_type_copy_nc_x16,
1566 batch_size, input, output,
1567 1 /* log2(sizeof(uint16_t)) */,
1568 1 /* log2(sizeof(uint16_t)) */,
1569 NULL, 0,
1570 pthreadpool_get_threads_count(threadpool));
1571 }
1572
xnn_setup_copy_nc_x32(xnn_operator_t copy_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1573 enum xnn_status xnn_setup_copy_nc_x32(
1574 xnn_operator_t copy_op,
1575 size_t batch_size,
1576 const void* input,
1577 void* output,
1578 pthreadpool_t threadpool)
1579 {
1580 return setup_unary_elementwise_nc(
1581 copy_op, xnn_operator_type_copy_nc_x32,
1582 batch_size, input, output,
1583 2 /* log2(sizeof(uint32_t)) */,
1584 2 /* log2(sizeof(uint32_t)) */,
1585 NULL, 0,
1586 pthreadpool_get_threads_count(threadpool));
1587 }
1588
xnn_setup_elu_nc_f16(xnn_operator_t elu_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1589 enum xnn_status xnn_setup_elu_nc_f16(
1590 xnn_operator_t elu_op,
1591 size_t batch_size,
1592 const void* input,
1593 void* output,
1594 pthreadpool_t threadpool)
1595 {
1596 return setup_unary_elementwise_nc(
1597 elu_op, xnn_operator_type_elu_nc_f16,
1598 batch_size, input, output,
1599 1 /* log2(sizeof(half)) */,
1600 1 /* log2(sizeof(half)) */,
1601 &elu_op->params.f16_elu, sizeof(elu_op->params.f16_elu),
1602 pthreadpool_get_threads_count(threadpool));
1603 }
1604
xnn_setup_elu_nc_f32(xnn_operator_t elu_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1605 enum xnn_status xnn_setup_elu_nc_f32(
1606 xnn_operator_t elu_op,
1607 size_t batch_size,
1608 const float* input,
1609 float* output,
1610 pthreadpool_t threadpool)
1611 {
1612 return setup_unary_elementwise_nc(
1613 elu_op, xnn_operator_type_elu_nc_f32,
1614 batch_size, input, output,
1615 2 /* log2(sizeof(float)) */,
1616 2 /* log2(sizeof(float)) */,
1617 &elu_op->params.f32_elu, sizeof(elu_op->params.f32_elu),
1618 pthreadpool_get_threads_count(threadpool));
1619 }
1620
xnn_setup_floor_nc_f16(xnn_operator_t floor_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1621 enum xnn_status xnn_setup_floor_nc_f16(
1622 xnn_operator_t floor_op,
1623 size_t batch_size,
1624 const void* input,
1625 void* output,
1626 pthreadpool_t threadpool)
1627 {
1628 return setup_unary_elementwise_nc(
1629 floor_op, xnn_operator_type_floor_nc_f16,
1630 batch_size, input, output,
1631 1 /* log2(sizeof(half)) */,
1632 1 /* log2(sizeof(half)) */,
1633 NULL, 0,
1634 pthreadpool_get_threads_count(threadpool));
1635 }
1636
xnn_setup_floor_nc_f32(xnn_operator_t floor_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1637 enum xnn_status xnn_setup_floor_nc_f32(
1638 xnn_operator_t floor_op,
1639 size_t batch_size,
1640 const float* input,
1641 float* output,
1642 pthreadpool_t threadpool)
1643 {
1644 return setup_unary_elementwise_nc(
1645 floor_op, xnn_operator_type_floor_nc_f32,
1646 batch_size, input, output,
1647 2 /* log2(sizeof(float)) */,
1648 2 /* log2(sizeof(float)) */,
1649 &floor_op->params.f32_rnd, sizeof(floor_op->params.f32_rnd),
1650 pthreadpool_get_threads_count(threadpool));
1651 }
1652
xnn_setup_hardswish_nc_f16(xnn_operator_t hardswish_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1653 enum xnn_status xnn_setup_hardswish_nc_f16(
1654 xnn_operator_t hardswish_op,
1655 size_t batch_size,
1656 const void* input,
1657 void* output,
1658 pthreadpool_t threadpool)
1659 {
1660 return setup_unary_elementwise_nc(
1661 hardswish_op, xnn_operator_type_hardswish_nc_f16,
1662 batch_size, input, output,
1663 1 /* log2(sizeof(half)) */,
1664 1 /* log2(sizeof(half)) */,
1665 &hardswish_op->params.f16_hswish, sizeof(hardswish_op->params.f16_hswish),
1666 pthreadpool_get_threads_count(threadpool));
1667 }
1668
xnn_setup_hardswish_nc_f32(xnn_operator_t hardswish_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1669 enum xnn_status xnn_setup_hardswish_nc_f32(
1670 xnn_operator_t hardswish_op,
1671 size_t batch_size,
1672 const float* input,
1673 float* output,
1674 pthreadpool_t threadpool)
1675 {
1676 return setup_unary_elementwise_nc(
1677 hardswish_op, xnn_operator_type_hardswish_nc_f32,
1678 batch_size, input, output,
1679 2 /* log2(sizeof(float)) */,
1680 2 /* log2(sizeof(float)) */,
1681 &hardswish_op->params.f32_hswish, sizeof(hardswish_op->params.f32_hswish),
1682 pthreadpool_get_threads_count(threadpool));
1683 }
1684
xnn_setup_leaky_relu_nc_f16(xnn_operator_t leaky_relu_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1685 enum xnn_status xnn_setup_leaky_relu_nc_f16(
1686 xnn_operator_t leaky_relu_op,
1687 size_t batch_size,
1688 const void* input,
1689 void* output,
1690 pthreadpool_t threadpool)
1691 {
1692 return setup_unary_elementwise_nc(
1693 leaky_relu_op, xnn_operator_type_leaky_relu_nc_f16,
1694 batch_size, input, output,
1695 1 /* log2(sizeof(uint16_t)) */,
1696 1 /* log2(sizeof(uint16_t)) */,
1697 &leaky_relu_op->params.f16_lrelu, sizeof(leaky_relu_op->params.f16_lrelu),
1698 pthreadpool_get_threads_count(threadpool));
1699 }
1700
xnn_setup_leaky_relu_nc_f32(xnn_operator_t leaky_relu_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1701 enum xnn_status xnn_setup_leaky_relu_nc_f32(
1702 xnn_operator_t leaky_relu_op,
1703 size_t batch_size,
1704 const float* input,
1705 float* output,
1706 pthreadpool_t threadpool)
1707 {
1708 return setup_unary_elementwise_nc(
1709 leaky_relu_op, xnn_operator_type_leaky_relu_nc_f32,
1710 batch_size, input, output,
1711 2 /* log2(sizeof(float)) */,
1712 2 /* log2(sizeof(float)) */,
1713 &leaky_relu_op->params.f32_lrelu, sizeof(leaky_relu_op->params.f32_lrelu),
1714 pthreadpool_get_threads_count(threadpool));
1715 }
1716
xnn_setup_leaky_relu_nc_qs8(xnn_operator_t leaky_relu_op,size_t batch_size,const int8_t * input,int8_t * output,pthreadpool_t threadpool)1717 enum xnn_status xnn_setup_leaky_relu_nc_qs8(
1718 xnn_operator_t leaky_relu_op,
1719 size_t batch_size,
1720 const int8_t* input,
1721 int8_t* output,
1722 pthreadpool_t threadpool)
1723 {
1724 return setup_unary_elementwise_nc(
1725 leaky_relu_op, xnn_operator_type_leaky_relu_nc_qs8,
1726 batch_size, input, output,
1727 0 /* log2(sizeof(int8_t)) */,
1728 0 /* log2(sizeof(int8_t)) */,
1729 &leaky_relu_op->params.qs8_lrelu, sizeof(leaky_relu_op->params.qs8_lrelu),
1730 pthreadpool_get_threads_count(threadpool));
1731 }
1732
xnn_setup_leaky_relu_nc_qu8(xnn_operator_t leaky_relu_op,size_t batch_size,const uint8_t * input,uint8_t * output,pthreadpool_t threadpool)1733 enum xnn_status xnn_setup_leaky_relu_nc_qu8(
1734 xnn_operator_t leaky_relu_op,
1735 size_t batch_size,
1736 const uint8_t* input,
1737 uint8_t* output,
1738 pthreadpool_t threadpool)
1739 {
1740 return setup_unary_elementwise_nc(
1741 leaky_relu_op, xnn_operator_type_leaky_relu_nc_qu8,
1742 batch_size, input, output,
1743 0 /* log2(sizeof(uint8_t)) */,
1744 0 /* log2(sizeof(uint8_t)) */,
1745 &leaky_relu_op->params.qu8_lrelu, sizeof(leaky_relu_op->params.qu8_lrelu),
1746 pthreadpool_get_threads_count(threadpool));
1747 }
1748
xnn_setup_negate_nc_f16(xnn_operator_t negate_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1749 enum xnn_status xnn_setup_negate_nc_f16(
1750 xnn_operator_t negate_op,
1751 size_t batch_size,
1752 const void* input,
1753 void* output,
1754 pthreadpool_t threadpool)
1755 {
1756 return setup_unary_elementwise_nc(
1757 negate_op, xnn_operator_type_negate_nc_f16,
1758 batch_size, input, output,
1759 1 /* log2(sizeof(uint16_t)) */,
1760 1 /* log2(sizeof(uint16_t)) */,
1761 &negate_op->params.f16_neg, sizeof(negate_op->params.f16_neg),
1762 pthreadpool_get_threads_count(threadpool));
1763 }
1764
xnn_setup_negate_nc_f32(xnn_operator_t negate_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1765 enum xnn_status xnn_setup_negate_nc_f32(
1766 xnn_operator_t negate_op,
1767 size_t batch_size,
1768 const float* input,
1769 float* output,
1770 pthreadpool_t threadpool)
1771 {
1772 return setup_unary_elementwise_nc(
1773 negate_op, xnn_operator_type_negate_nc_f32,
1774 batch_size, input, output,
1775 2 /* log2(sizeof(float)) */,
1776 2 /* log2(sizeof(float)) */,
1777 &negate_op->params.f32_neg, sizeof(negate_op->params.f32_neg),
1778 pthreadpool_get_threads_count(threadpool));
1779 }
1780
xnn_setup_sigmoid_nc_f16(xnn_operator_t sigmoid_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1781 enum xnn_status xnn_setup_sigmoid_nc_f16(
1782 xnn_operator_t sigmoid_op,
1783 size_t batch_size,
1784 const void* input,
1785 void* output,
1786 pthreadpool_t threadpool)
1787 {
1788 return setup_unary_elementwise_nc(
1789 sigmoid_op, xnn_operator_type_sigmoid_nc_f16,
1790 batch_size, input, output,
1791 1 /* log2(sizeof(uint16_t)) */,
1792 1 /* log2(sizeof(uint16_t)) */,
1793 &sigmoid_op->params.f16_sigmoid, sizeof(sigmoid_op->params.f16_sigmoid),
1794 pthreadpool_get_threads_count(threadpool));
1795 }
1796
xnn_setup_sigmoid_nc_f32(xnn_operator_t sigmoid_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1797 enum xnn_status xnn_setup_sigmoid_nc_f32(
1798 xnn_operator_t sigmoid_op,
1799 size_t batch_size,
1800 const float* input,
1801 float* output,
1802 pthreadpool_t threadpool)
1803 {
1804 return setup_unary_elementwise_nc(
1805 sigmoid_op, xnn_operator_type_sigmoid_nc_f32,
1806 batch_size, input, output,
1807 2 /* log2(sizeof(float)) */,
1808 2 /* log2(sizeof(float)) */,
1809 &sigmoid_op->params.f32_sigmoid, sizeof(sigmoid_op->params.f32_sigmoid),
1810 pthreadpool_get_threads_count(threadpool));
1811 }
1812
xnn_setup_square_nc_f16(xnn_operator_t square_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1813 enum xnn_status xnn_setup_square_nc_f16(
1814 xnn_operator_t square_op,
1815 size_t batch_size,
1816 const void* input,
1817 void* output,
1818 pthreadpool_t threadpool)
1819 {
1820 return setup_unary_elementwise_nc(
1821 square_op, xnn_operator_type_square_nc_f16,
1822 batch_size, input, output,
1823 1 /* log2(sizeof(uint16_t)) */,
1824 1 /* log2(sizeof(uint16_t)) */,
1825 NULL, 0,
1826 pthreadpool_get_threads_count(threadpool));
1827 }
1828
xnn_setup_square_nc_f32(xnn_operator_t square_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1829 enum xnn_status xnn_setup_square_nc_f32(
1830 xnn_operator_t square_op,
1831 size_t batch_size,
1832 const float* input,
1833 float* output,
1834 pthreadpool_t threadpool)
1835 {
1836 return setup_unary_elementwise_nc(
1837 square_op, xnn_operator_type_square_nc_f32,
1838 batch_size, input, output,
1839 2 /* log2(sizeof(float)) */,
1840 2 /* log2(sizeof(float)) */,
1841 &square_op->params.f32_default, sizeof(square_op->params.f32_default),
1842 pthreadpool_get_threads_count(threadpool));
1843 }
1844
xnn_setup_square_root_nc_f16(xnn_operator_t sqrt_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1845 enum xnn_status xnn_setup_square_root_nc_f16(
1846 xnn_operator_t sqrt_op,
1847 size_t batch_size,
1848 const void* input,
1849 void* output,
1850 pthreadpool_t threadpool)
1851 {
1852 return setup_unary_elementwise_nc(
1853 sqrt_op, xnn_operator_type_square_root_nc_f16,
1854 batch_size, input, output,
1855 1 /* log2(sizeof(half)) */,
1856 1 /* log2(sizeof(half)) */,
1857 NULL, 0,
1858 pthreadpool_get_threads_count(threadpool));
1859 }
1860
xnn_setup_square_root_nc_f32(xnn_operator_t sqrt_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1861 enum xnn_status xnn_setup_square_root_nc_f32(
1862 xnn_operator_t sqrt_op,
1863 size_t batch_size,
1864 const float* input,
1865 float* output,
1866 pthreadpool_t threadpool)
1867 {
1868 return setup_unary_elementwise_nc(
1869 sqrt_op, xnn_operator_type_square_root_nc_f32,
1870 batch_size, input, output,
1871 2 /* log2(sizeof(float)) */,
1872 2 /* log2(sizeof(float)) */,
1873 &sqrt_op->params.f32_sqrt, sizeof(sqrt_op->params.f32_sqrt),
1874 pthreadpool_get_threads_count(threadpool));
1875 }
1876
xnn_setup_truncation_nc_f16(xnn_operator_t truncation_op,size_t batch_size,const void * input,void * output,pthreadpool_t threadpool)1877 enum xnn_status xnn_setup_truncation_nc_f16(
1878 xnn_operator_t truncation_op,
1879 size_t batch_size,
1880 const void* input,
1881 void* output,
1882 pthreadpool_t threadpool)
1883 {
1884 return setup_unary_elementwise_nc(
1885 truncation_op, xnn_operator_type_truncation_nc_f16,
1886 batch_size, input, output,
1887 1 /* log2(sizeof(half)) */,
1888 1 /* log2(sizeof(half)) */,
1889 NULL, 0,
1890 pthreadpool_get_threads_count(threadpool));
1891 }
1892
xnn_setup_truncation_nc_f32(xnn_operator_t truncation_op,size_t batch_size,const float * input,float * output,pthreadpool_t threadpool)1893 enum xnn_status xnn_setup_truncation_nc_f32(
1894 xnn_operator_t truncation_op,
1895 size_t batch_size,
1896 const float* input,
1897 float* output,
1898 pthreadpool_t threadpool)
1899 {
1900 return setup_unary_elementwise_nc(
1901 truncation_op, xnn_operator_type_truncation_nc_f32,
1902 batch_size, input, output,
1903 2 /* log2(sizeof(float)) */,
1904 2 /* log2(sizeof(float)) */,
1905 &truncation_op->params.f32_rnd, sizeof(truncation_op->params.f32_rnd),
1906 pthreadpool_get_threads_count(threadpool));
1907 }
1908