/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-lut64-p2-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 104 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 105 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 106 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 107 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 108 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 109 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 110 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 111 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() 112 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20() [all …]
|
D | neonfma-lut64-p2-x20-acc5.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 108 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 109 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 110 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 111 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 112 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 113 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 114 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 115 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() 116 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc5() [all …]
|
D | neonfma-lut64-p2-x20-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 105 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 106 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 107 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 108 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 109 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 110 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 111 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 112 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() 113 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x20_acc2() [all …]
|
D | neon-lut64-p2-x20.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 105 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 106 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 107 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 108 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 109 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 110 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 111 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 112 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() 113 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20() [all …]
|
D | neon-lut64-p2-x20-acc5.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 109 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 110 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 111 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 112 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 113 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 114 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 115 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 116 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() 117 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc5() [all …]
|
D | neon-lut64-p2-x20-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 106 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 107 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 108 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 109 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 110 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 111 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 112 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 113 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() 114 float32x2_t vlGH = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxGH]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x20_acc2() [all …]
|
D | neonfma-lut64-p2-x16-acc4.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 100 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 101 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 102 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 103 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 104 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 105 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 106 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 107 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() 109 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc4() [all …]
|
D | neon-lut64-p2-x16-acc4.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 101 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 102 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 103 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 104 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 105 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 106 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 107 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 108 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() 110 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc4() [all …]
|
D | neon-lut64-p2-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 98 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 99 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 100 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 101 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 102 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 103 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 104 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 105 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() 107 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16() [all …]
|
D | neon-lut64-p2-x16-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 99 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 100 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 101 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 102 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 103 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 104 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 105 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 106 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() 108 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x16_acc2() [all …]
|
D | neonfma-lut64-p2-x16-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 98 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 99 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 100 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 101 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 102 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 103 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 104 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 105 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() 107 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16_acc2() [all …]
|
D | neonfma-lut64-p2-x16.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 97 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 98 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 99 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 100 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 101 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 102 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 103 float32x2_t vlCD = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxCD]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 104 float32x2_t vlEF = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxEF]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() 106 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x16() [all …]
|
D | neonfma-lut64-p2-x12-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 91 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 92 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 93 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 94 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 95 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 96 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 98 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 99 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 101 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() [all …]
|
D | neon-lut64-p2-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 91 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 92 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 93 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 94 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 95 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 96 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 98 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 99 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() 101 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12() [all …]
|
D | neonfma-lut64-p2-x12.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 90 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 91 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 92 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 93 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 94 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 95 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 97 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 98 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() 100 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12() [all …]
|
D | neon-lut64-p2-x12-acc3.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 93 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 94 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 95 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 96 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 97 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 98 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 100 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 101 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() 103 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc3() [all …]
|
D | neonfma-lut64-p2-x12-acc3.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 92 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 93 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 94 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 95 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 96 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 97 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 99 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 100 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc3() [all …]
|
D | neon-lut64-p2-x12-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 92 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 93 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 94 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 95 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 96 float32x2_t vl89 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx89]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 97 float32x2_t vlAB = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidxAB]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 99 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 100 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() 102 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x12_acc2() [all …]
|
D | neon-lut64-p2-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 85 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 86 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 87 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 90 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 92 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 93 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 174 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() [all …]
|
D | neonfma-lut64-p2-x8-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 85 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 86 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 87 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 90 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 92 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 93 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 176 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() [all …]
|
D | neon-lut64-p2-x8-acc2.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 85 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 86 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 87 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 88 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 90 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 91 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 93 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 94 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 177 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() [all …]
|
D | neonfma-lut64-p2-x8.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 83 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 84 float32x2_t vl23 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx23]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 85 float32x2_t vl45 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx45]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 86 float32x2_t vl67 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx67]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 88 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 89 vl23 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx23 >> 32)], vl23, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 91 vl45 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx45 >> 32)], vl45, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 92 vl67 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx67 >> 32)], vl67, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 173 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() [all …]
|
D | neonfma-lut64-p2-x4.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 75 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 76 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 77 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 78 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 151 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 152 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 153 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4() 154 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x4()
|
D | neon-lut64-p2-x4.c | 18 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 76 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 77 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 78 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 79 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 152 float32x2_t vl_lo = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_lo]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 153 float32x2_t vl_hi = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx_hi]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 154 vl_lo = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_lo >> 32)], vl_lo, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4() 155 vl_hi = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx_hi >> 32)], vl_hi, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x4()
|
/external/XNNPACK/src/math/ |
D | exp-sse2-rr2-lut64-p2.c | 17 extern XNN_INTERNAL const float xnn_table_exp2_k_over_64[64]; 73 …const __m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) &xnn_table_exp2_k_over_64 + (uin… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 74 …const __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 75 …const __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 76 …const __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 82 …const __m128i vl0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx0… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 83 …const __m128i vl2 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx2… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 84 …const __m128i vl1 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx1… in xnn_math_f32_exp__sse2_rr2_lut64_p2() 85 …const __m128i vl3 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + vidx3… in xnn_math_f32_exp__sse2_rr2_lut64_p2()
|