Home
last modified time | relevance | path

Searched refs:xnn_table_exp2_k_over_64 (Results 1 – 25 of 41) sorted by relevance

12

/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneonfma-lut64-p2-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
104 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
105 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
106 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
107 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
108 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
109 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
110 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
111 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
112 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20()
[all …]
Dneonfma-lut64-p2-x20-acc5.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
108 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
109 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
110 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
111 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
112 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
113 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
114 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
115 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
116 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5()
[all …]
Dneonfma-lut64-p2-x20-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
105 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
106 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
107 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
108 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
109 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
110 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
111 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
112 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
113 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2()
[all …]
Dneon-lut64-p2-x20.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
105 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
106 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
107 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
108 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
109 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
110 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
111 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
112 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
113 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20()
[all …]
Dneon-lut64-p2-x20-acc5.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
109 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
110 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
111 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
112 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
113 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
114 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
115 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
116 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
117 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5()
[all …]
Dneon-lut64-p2-x20-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
106 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
107 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
108 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
109 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
110 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
111 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
112 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
113 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2()
[all …]
Dneonfma-lut64-p2-x16-acc4.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
100 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
101 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
102 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
103 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
104 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
105 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
106 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
107 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
109 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4()
[all …]
Dneon-lut64-p2-x16-acc4.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
101 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
102 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
103 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
104 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
105 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
106 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
107 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
108 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
110 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4()
[all …]
Dneon-lut64-p2-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
98 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
99 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
100 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
101 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
102 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
103 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
104 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
105 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
107 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16()
[all …]
Dneon-lut64-p2-x16-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
99 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
100 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
101 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
102 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
103 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
104 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
105 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
106 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
108 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2()
[all …]
Dneonfma-lut64-p2-x16-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
98 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
99 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
100 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
101 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
102 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
103 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
104 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
105 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
107 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2()
[all …]
Dneonfma-lut64-p2-x16.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
97 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
98 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
99 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
100 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
101 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
102 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
103 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
104 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
106 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16()
[all …]
Dneonfma-lut64-p2-x12-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
91 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
92 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
93 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
94 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
95 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
96 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
98 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
99 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
101 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
[all …]
Dneon-lut64-p2-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
91 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
92 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
93 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
94 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
95 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
96 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
98 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
99 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
101 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12()
[all …]
Dneonfma-lut64-p2-x12.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
90 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
91 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
93 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
94 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
95 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
97 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
98 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
100 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12()
[all …]
Dneon-lut64-p2-x12-acc3.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
93 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
94 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
95 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
96 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
97 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
98 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
100 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
101 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
103 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3()
[all …]
Dneonfma-lut64-p2-x12-acc3.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
92 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
93 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
94 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
95 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
96 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
97 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
99 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
100 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3()
[all …]
Dneon-lut64-p2-x12-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
92 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
93 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
94 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
95 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
96 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
97 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
99 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
100 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2()
[all …]
Dneon-lut64-p2-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
85 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
86 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
87 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
90 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
92 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
93 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
174 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
[all …]
Dneonfma-lut64-p2-x8-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
85 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
86 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
87 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
90 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
92 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
93 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
176 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
[all …]
Dneon-lut64-p2-x8-acc2.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
85 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
86 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
88 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
90 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
94 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
177 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
[all …]
Dneonfma-lut64-p2-x8.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
83 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
85 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
86 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
88 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
89 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
91 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
92 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
173 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
[all …]
Dneonfma-lut64-p2-x4.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
76 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
77 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
78 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
151 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
152 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
153 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
154 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
Dneon-lut64-p2-x4.c18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
76 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
77 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
78 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
79 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
152 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
153 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
154 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
155 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
/external/XNNPACK/src/math/
Dexp-sse2-rr2-lut64-p2.c17 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64];
73 …const __m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) &xnn_table_exp2_k_over_64 + (uin… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
74 …const __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
75 …const __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
76 …const __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
82 …const __m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx0… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
83 …const __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx2… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
84 …const __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx1… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
85 …const __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx3… in xnn_math_f32_exp__sse2_rr2_lut64_p2()

12