Home
last modified time | relevance | path

Searched refs:vidx01 (Results 1 – 25 of 121) sorted by relevance

12345

/external/XNNPACK/src/math/
Dexpminus-neonfma-rr2-lut2048-p1.c64 const uint64_t vidx01 = vgetq_lane_u64(vidx, 0); in xnn_math_f32_expminus__neonfma_rr2_lut2048_p1() local
66 …1 = vld1_dup_f32((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_2048 + (uint32_t) vidx01)); in xnn_math_f32_expminus__neonfma_rr2_lut2048_p1()
68 …(const float*) ((uintptr_t) xnn_table_exp2minus_k_over_2048 + (uint32_t) (vidx01 >> 32)), vl01, 1); in xnn_math_f32_expminus__neonfma_rr2_lut2048_p1()
Dexpminus-neonfma-rr2-lut64-p2.c64 const uint64_t vidx01 = vgetq_lane_u64(vidx, 0); in xnn_math_f32_expminus__neonfma_rr2_lut64_p2() local
66 …l01 = vld1_dup_f32((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx01)); in xnn_math_f32_expminus__neonfma_rr2_lut64_p2()
68 …2((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01 >> 32)), vl01, 1); in xnn_math_f32_expminus__neonfma_rr2_lut64_p2()
Dexp-neonfma-rr2-lut64-p2.c70 const uint64_t vidx01 = vgetq_lane_u64(vidx, 0); in xnn_math_f32_exp__neonfma_rr2_lut64_p2() local
72 …2_t vl01 = vld1_dup_f32((const float*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint32_t) vidx01)); in xnn_math_f32_exp__neonfma_rr2_lut64_p2()
74 …vl01 = vld1_lane_f32((const float*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint32_t) (vidx01 >> … in xnn_math_f32_exp__neonfma_rr2_lut64_p2()
Dexp-sse2-rr2-lut64-p2.c71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_exp__sse2_rr2_lut64_p2() local
73 …0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) &xnn_table_exp2_k_over_64 + (uint32_t) vidx01))); in xnn_math_f32_exp__sse2_rr2_lut64_p2()
75 …_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint32_t) (vidx01 >> 32)))); in xnn_math_f32_exp__sse2_rr2_lut64_p2()
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/
Dneon-lut64-p2-x8.c78 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local
84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
Dneonfma-lut64-p2-x8-acc2.c78 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local
84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
Dneon-lut64-p2-x8-acc2.c79 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local
85 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
90 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
Dneonfma-lut64-p2-x8.c77 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local
83 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
88 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
Dneonfma-lut64-p2-x12-acc2.c82 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() local
91 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
98 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
/external/XNNPACK/src/f32-sigmoid/gen/
Dneonfma-rr1-lut64-p2-div-x8.c53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local
55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
Dwasmsimd-lut64-p2-div-x8.c54 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8() local
56 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx01)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8()
57 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01 >> 32))); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8()
Dneonfma-rr1-lut2048-p1-div-x8.c52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
Dneonfma-rr1-lut64-p2-nr2recps-x8.c53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local
55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
Dneon-rr2-lut64-p2-nr2recps-x8.c54 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local
56 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
63 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
Dneon-rr2-lut2048-p1-nr2recps-x8.c53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local
55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
Dneonfma-rr1-lut64-p2-nr1recps1fma-x8.c53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local
55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
Dneonfma-rr1-lut2048-p1-nr2recps-x8.c52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
Dneonfma-rr1-lut2048-p1-nr2fma-x8.c52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
Dneonfma-rr1-lut2048-p1-nr1recps1fma-x8.c52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local
54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
Dneonfma-rr1-lut64-p2-nr2fma-x8.c53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() local
55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
Dwasmsimd-lut64-p2-div-x12.c59 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() local
61 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx01)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
62 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01 >> 32))); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
Dneonfma-rr1-lut2048-p1-div-x12.c57 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local
59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
/external/XNNPACK/src/f32-velu/gen/
Dvelu-wasmsimd-arm-rr2-lut16-p3-x8.c61 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() local
63 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx01)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
64 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx01 >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
Dvelu-neonfma-rr1-lut16-p3-x8.c59 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() local
61 …1 = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx01)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
63 …(const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx01 >> 32)), vl01, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
Dvelu-wasmsimd-x86-rr2-lut16-p3-x8.c61 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local
63 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx01)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
64 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx01 >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()

12345