1 // Auto-generated file. Do not edit!
2 // Template: src/f32-hswish/scalar.c.in
3 // Generator: tools/xngen
4 //
5 // Copyright 2019 Google LLC
6 //
7 // This source code is licensed under the BSD-style license found in the
8 // LICENSE file in the root directory of this source tree.
9
10 #include <assert.h>
11
12 #include <xnnpack/common.h>
13 #include <xnnpack/math.h>
14 #include <xnnpack/hswish.h>
15
16
xnn_f32_hswish_ukernel__scalar_x4(size_t n,const float * x,float * y,const union xnn_f32_hswish_params params[restrict XNN_MIN_ELEMENTS (1)])17 void xnn_f32_hswish_ukernel__scalar_x4(
18 size_t n,
19 const float* x,
20 float* y,
21 const union xnn_f32_hswish_params params[restrict XNN_MIN_ELEMENTS(1)])
22 {
23 assert(n != 0);
24 assert(n % sizeof(float) == 0);
25
26 const float vsixth = params->scalar.sixth;
27 const float vthree = params->scalar.three;
28 const float vsix = params->scalar.six;
29 const float vzero = 0.0f;
30 assert(vthree == 3.0f);
31 assert(vsix == 6.0f);
32
33 for (; n >= 4 * sizeof(float); n -= 4 * sizeof(float)) {
34 float vx0 = x[0];
35 float vx1 = x[1];
36 float vx2 = x[2];
37 float vx3 = x[3];
38 x += 4;
39
40 float vacc0 = vx0 + vthree;
41 vx0 *= vsixth;
42 float vacc1 = vx1 + vthree;
43 vx1 *= vsixth;
44 float vacc2 = vx2 + vthree;
45 vx2 *= vsixth;
46 float vacc3 = vx3 + vthree;
47 vx3 *= vsixth;
48
49 vacc0 = math_max_f32(vacc0, vzero);
50 vacc1 = math_max_f32(vacc1, vzero);
51 vacc2 = math_max_f32(vacc2, vzero);
52 vacc3 = math_max_f32(vacc3, vzero);
53
54 vacc0 = math_min_f32(vacc0, vsix);
55 vacc1 = math_min_f32(vacc1, vsix);
56 vacc2 = math_min_f32(vacc2, vsix);
57 vacc3 = math_min_f32(vacc3, vsix);
58
59 vacc0 *= vx0;
60 vacc1 *= vx1;
61 vacc2 *= vx2;
62 vacc3 *= vx3;
63
64 y[0] = vacc0;
65 y[1] = vacc1;
66 y[2] = vacc2;
67 y[3] = vacc3;
68 y += 4;
69 }
70 if XNN_UNLIKELY(n != 0) {
71 do {
72 float vx = *x++;
73 float vacc = vx + vthree;
74 vx *= vsixth;
75 vacc = math_max_f32(vacc, vzero);
76 vacc = math_min_f32(vacc, vsix);
77 vacc *= vx;
78 *y++ = vacc;
79 n -= sizeof(float);
80 } while (n != 0);
81 }
82 }
83