/external/XNNPACK/src/math/ |
D | expminus-neonfma-rr2-lut2048-p1.c | 64 const uint64_t vidx01 = vgetq_lane_u64(vidx, 0); in xnn_math_f32_expminus__neonfma_rr2_lut2048_p1() local 66 …1 = vld1_dup_f32((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_2048 + (uint32_t) vidx01)); in xnn_math_f32_expminus__neonfma_rr2_lut2048_p1() 68 …(const float*) ((uintptr_t) xnn_table_exp2minus_k_over_2048 + (uint32_t) (vidx01 >> 32)), vl01, 1); in xnn_math_f32_expminus__neonfma_rr2_lut2048_p1()
|
D | expminus-neonfma-rr2-lut64-p2.c | 64 const uint64_t vidx01 = vgetq_lane_u64(vidx, 0); in xnn_math_f32_expminus__neonfma_rr2_lut64_p2() local 66 …l01 = vld1_dup_f32((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx01)); in xnn_math_f32_expminus__neonfma_rr2_lut64_p2() 68 …2((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01 >> 32)), vl01, 1); in xnn_math_f32_expminus__neonfma_rr2_lut64_p2()
|
D | exp-neonfma-rr2-lut64-p2.c | 70 const uint64_t vidx01 = vgetq_lane_u64(vidx, 0); in xnn_math_f32_exp__neonfma_rr2_lut64_p2() local 72 …2_t vl01 = vld1_dup_f32((const float*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint32_t) vidx01)); in xnn_math_f32_exp__neonfma_rr2_lut64_p2() 74 …vl01 = vld1_lane_f32((const float*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint32_t) (vidx01 >> … in xnn_math_f32_exp__neonfma_rr2_lut64_p2()
|
D | exp-sse2-rr2-lut64-p2.c | 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx); in xnn_math_f32_exp__sse2_rr2_lut64_p2() local 73 …0 = _mm_cvtsi32_si128(*((const int*) ((uintptr_t) &xnn_table_exp2_k_over_64 + (uint32_t) vidx01))); in xnn_math_f32_exp__sse2_rr2_lut64_p2() 75 …_cvtsi32_si128(*((const int*) ((uintptr_t) xnn_table_exp2_k_over_64 + (uint32_t) (vidx01 >> 32)))); in xnn_math_f32_exp__sse2_rr2_lut64_p2()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neon-lut64-p2-x8.c | 78 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() local 84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8() 89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8()
|
D | neonfma-lut64-p2-x8-acc2.c | 78 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() local 84 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2() 89 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8_acc2()
|
D | neon-lut64-p2-x8-acc2.c | 79 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() local 85 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2() 90 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_lut64_p2_x8_acc2()
|
D | neonfma-lut64-p2-x8.c | 77 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() local 83 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8() 88 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x8()
|
D | neonfma-lut64-p2-x12-acc2.c | 82 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() local 91 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2_k_over_64[(uint32_t) vidx01]); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2() 98 vl01 = vld1_lane_f32(&xnn_table_exp2_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_lut64_p2_x12_acc2()
|
/external/XNNPACK/src/f32-sigmoid/gen/ |
D | neonfma-rr1-lut64-p2-div-x8.c | 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | wasmsimd-lut64-p2-div-x8.c | 54 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8() local 56 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx01)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8() 57 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01 >> 32))); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x8()
|
D | neonfma-rr1-lut2048-p1-div-x8.c | 52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | neonfma-rr1-lut64-p2-nr2recps-x8.c | 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() local 55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8() 62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2recps_x8()
|
D | neon-rr2-lut64-p2-nr2recps-x8.c | 54 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() local 56 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8() 63 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut64_p2_nr2recps_x8()
|
D | neon-rr2-lut2048-p1-nr2recps-x8.c | 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() local 55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8() 62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neon_rr2_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut64-p2-nr1recps1fma-x8.c | 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() local 55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8() 62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr1recps1fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr2recps-x8.c | 52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() local 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x8()
|
D | neonfma-rr1-lut2048-p1-nr2fma-x8.c | 52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() local 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2fma_x8()
|
D | neonfma-rr1-lut2048-p1-nr1recps1fma-x8.c | 52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() local 54 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8() 61 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_nr1recps1fma_x8()
|
D | neonfma-rr1-lut64-p2-nr2fma-x8.c | 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() local 55 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8() 62 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_64[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut64_p2_nr2fma_x8()
|
D | wasmsimd-lut64-p2-div-x12.c | 59 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() local 61 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) vidx01)); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12() 62 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_64 + (uint32_t) (vidx01 >> 32))); in xnn_f32_sigmoid_ukernel__wasmsimd_lut64_p2_div_x12()
|
D | neonfma-rr1-lut2048-p1-div-x12.c | 57 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 59 float32x2_t vl01 = vld1_dup_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) vidx01]); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 70 vl01 = vld1_lane_f32(&xnn_table_exp2minus_k_over_2048[(uint32_t) (vidx01 >> 32)], vl01, 1); in xnn_f32_sigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-wasmsimd-arm-rr2-lut16-p3-x8.c | 61 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() local 63 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx01)); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8() 64 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx01 >> 32))); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x8()
|
D | velu-neonfma-rr1-lut16-p3-x8.c | 59 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() local 61 …1 = vld1_dup_s32((const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx01)); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8() 63 …(const int32_t*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx01 >> 32)), vl01, 1); in xnn_f32_velu_ukernel__neonfma_rr1_lut16_p3_x8()
|
D | velu-wasmsimd-x86-rr2-lut16-p3-x8.c | 61 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() local 63 …st float vl0 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) vidx01)); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8() 64 …at vl1 = *((const float*) ((uintptr_t) xnn_table_exp2minus_k_over_16 + (uint32_t) (vidx01 >> 32))); in xnn_f32_velu_ukernel__wasmsimd_x86_rr2_lut16_p3_x8()
|