Home
last modified time | relevance | path

Searched refs:xnn_table_exp2minus_k_over_2048 (Results 1 – 25 of 48) sorted by relevance

12

/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-lut2048-p1-div-x24.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24()
[all …]
Dneonfma-rr1-lut2048-p1-div-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20()
[all …]
Dneonfma-rr1-lut2048-p1-div-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16()
[all …]
Dneonfma-rr1-lut2048-p1-nr2recps-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20()
[all …]
Dneonfma-rr1-lut2048-p1-div-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
[all …]
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x24.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24()
[all …]
Dneon-rr2-lut2048-p1-nr2recps-x24.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
75 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
79 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
80 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
83 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
84 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
87 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
88 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
91 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24()
[all …]
Dneonfma-rr1-lut2048-p1-nr2fma-x24.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24()
[all …]
Dneonfma-rr1-lut2048-p1-nr2recps-x24.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24()
[all …]
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16()
[all …]
Dneonfma-rr1-lut2048-p1-nr2recps-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16()
[all …]
Dneonfma-rr1-lut2048-p1-nr2fma-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20()
[all …]
Dneon-rr2-lut2048-p1-nr2recps-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
70 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
71 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
74 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
75 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
78 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
79 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
82 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
83 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
86 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20()
[all …]
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20()
[all …]
Dneonfma-rr1-lut2048-p1-div-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
112 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
[all …]
Dneonfma-rr1-lut2048-p1-nr2recps-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
[all …]
Dneonfma-rr1-lut2048-p1-nr2fma-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12()
[all …]
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12()
[all …]
Dneon-rr2-lut2048-p1-nr2recps-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
60 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
61 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
64 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
65 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
69 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
71 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
72 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
74 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12()
[all …]
Dneon-rr2-lut2048-p1-nr2recps-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
65 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
66 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
69 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
70 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
73 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
74 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
77 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
78 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
80 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16()
[all …]
Dneonfma-rr1-lut2048-p1-nr2fma-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16()
[all …]
Dneon-rr2-lut2048-p1-nr2recps-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
56 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
59 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
63 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
65 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
125 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
[all …]
Dneonfma-rr1-lut2048-p1-nr2recps-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
121 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
[all …]
Dneonfma-rr1-lut2048-p1-nr2fma-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
121 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
[all …]
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048];
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
121 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
[all …]

12