/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut2048-p1-div-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() 90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() 85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x20() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() 79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x16() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() 85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x20() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() [all …]
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() 90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x24() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 75 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 76 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 79 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 80 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 83 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 84 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 87 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 88 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() 91 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2fma-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() 90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x24.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 74 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 75 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 78 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 79 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 82 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 83 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 86 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 87 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() 90 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x24() [all …]
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() 79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x16() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() 79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x16() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2fma-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() 85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x20() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 70 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 71 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 74 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 75 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 78 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 79 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 82 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 83 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() 86 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x20() [all …]
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 69 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 70 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 73 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 74 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 77 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 78 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 81 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 82 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() 85 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxGH]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x20() [all …]
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 112 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2fma-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() 73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x12() [all …]
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 60 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 63 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 64 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 67 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 68 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 71 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() 73 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x12() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 60 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 61 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 64 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 65 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 68 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 69 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 71 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 72 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() 74 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x12() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 65 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 66 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 69 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 70 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 73 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 74 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 77 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 78 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() 80 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x16() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2fma-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 64 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 65 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 68 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 69 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 72 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx89]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 73 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxAB]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 76 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxCD]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 77 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidxEF]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() 79 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x16() [all …]
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 56 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 59 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 60 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 63 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 65 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 66 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 125 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 121 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() [all …]
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 121 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() [all …]
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2minus_k_over_2048[2048]; 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 55 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx23]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 58 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx45]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 59 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx67]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 62 vl23 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 64 vl45 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 65 vl67 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 121 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx_lo]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() [all …]
|