/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-sse2-rr2-lut64-p2-div-x8.c | 52 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() local 56 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() 57 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() 75 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() 76 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() 77 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8() 78 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x8()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x8.c | 52 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() local 56 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 57 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 71 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 72 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 73 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8() 74 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x8()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x12.c | 56 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() local 61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 62 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 89 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 90 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 91 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 92 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x12.c | 56 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() local 61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 62 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 83 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 84 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 85 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 86 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x16.c | 60 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() local 66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 67 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 103 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 104 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 105 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 106 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x16.c | 60 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() local 66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 67 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 95 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 96 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 97 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 98 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x20.c | 64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() local 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 72 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 107 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 108 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 109 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 110 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x20.c | 64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() local 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 117 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 118 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 119 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 120 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x24.c | 68 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() local 76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 77 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 131 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 132 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 133 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 134 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x24.c | 68 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() local 76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 77 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 119 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 120 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 121 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 122 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
|
D | vsigmoid-neonfma-rr1-lut64-p2-div-x8.c | 50 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() local 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8() 54 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x8()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-div-x8.c | 49 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() local 52 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8() 53 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x8()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x8.c | 51 const v128_t vidx0123 = wasm_i32x4_shl(wasm_v128_and(vn0123, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() local 54 const uint64_t vidx01 = wasm_i64x2_extract_lane(vidx0123, 0); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8() 55 const uint64_t vidx23 = wasm_i64x2_extract_lane(vidx0123, 1); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x8()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse2-rr2-lut16-p3-x8.c | 55 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() local 61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 62 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 80 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 81 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 82 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8() 83 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x8()
|
D | velu-sse41-rr2-lut16-p3-x8.c | 55 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() local 61 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 62 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 76 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 77 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 78 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8() 79 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x8()
|
D | velu-sse2-rr2-lut16-p3-x12.c | 58 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 67 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 94 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 95 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 96 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 97 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
|
D | velu-sse41-rr2-lut16-p3-x12.c | 58 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 66 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 67 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 88 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 89 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 90 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 91 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
|
D | velu-sse2-rr2-lut16-p3-x16.c | 61 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() local 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 72 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 108 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 109 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 110 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 111 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
|
D | velu-sse41-rr2-lut16-p3-x16.c | 61 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 71 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 72 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 100 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 101 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 102 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 103 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
|
D | velu-sse2-rr2-lut16-p3-x20.c | 64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() local 76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 77 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 122 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 123 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 124 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 125 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
|
D | velu-sse41-rr2-lut16-p3-x20.c | 64 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() local 76 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 77 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 112 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 113 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 114 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 115 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
|
D | velu-sse2-rr2-lut16-p3-x24.c | 67 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local 81 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 82 const uint64_t vidx23 = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx0123, vidx0123)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 136 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 137 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 138 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 139 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x24.c | 67 … const __m128i vidx0123 = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn0123), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local 81 const uint64_t vidx01 = (uint64_t) _mm_cvtsi128_si64(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 82 const uint64_t vidx23 = (uint64_t) _mm_extract_epi64(vidx0123, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 124 const uint32_t vidx0 = (uint32_t) _mm_cvtsi128_si32(vidx0123); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 125 const uint32_t vidx1 = (uint32_t) _mm_extract_epi16(vidx0123, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 126 const uint32_t vidx2 = (uint32_t) _mm_extract_epi16(vidx0123, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 127 const uint32_t vidx3 = (uint32_t) _mm_extract_epi16(vidx0123, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-rr1-lut64-p2-x8-acc2.c | 53 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2() local 54 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2() 55 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8_acc2()
|
D | neonfma-rr1-lut64-p2-x8.c | 52 …const uint64x2_t vidx0123 = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn0123), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8() local 53 const uint64_t vidx01 = vgetq_lane_u64(vidx0123, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8() 54 const uint64_t vidx23 = vgetq_lane_u64(vidx0123, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x8()
|