/external/XNNPACK/src/f32-vsigmoid/gen/ |
D | vsigmoid-sse2-rr2-lut64-p2-div-x12.c | 58 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() local 79 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 80 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 111 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 112 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 113 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12() 114 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x12()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x12.c | 58 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() local 75 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 76 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 101 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 102 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 103 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12() 104 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x12()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x16.c | 62 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() local 84 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 85 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 125 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 126 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 127 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16() 128 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x16()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x16.c | 62 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() local 80 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 81 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 113 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 114 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 115 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16() 116 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x16()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x20.c | 66 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() local 85 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 86 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 125 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 126 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 127 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20() 128 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x20()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x20.c | 66 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() local 89 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 90 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 139 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 140 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 141 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20() 142 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x20()
|
D | vsigmoid-sse2-rr2-lut64-p2-div-x24.c | 70 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() local 94 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 95 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 153 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 154 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 155 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24() 156 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse2_rr2_lut64_p2_div_x24()
|
D | vsigmoid-sse41-rr2-lut64-p2-div-x24.c | 70 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() local 90 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 91 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 137 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 138 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 139 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24() 140 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_vsigmoid_ukernel__sse41_rr2_lut64_p2_div_x24()
|
D | vsigmoid-neonfma-rr1-lut64-p2-div-x12.c | 56 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() local 66 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12() 67 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut64_p2_div_x12()
|
D | vsigmoid-wasmsimd-rr2-lut64-p2-div-x12.c | 57 const v128_t vidx89AB = wasm_i32x4_shl(wasm_v128_and(vn89AB, vindex_mask), 2); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() local 73 const uint64_t vidx89 = wasm_i64x2_extract_lane(vidx89AB, 0); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12() 74 const uint64_t vidxAB = wasm_i64x2_extract_lane(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__wasmsimd_rr2_lut64_p2_div_x12()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-div-x12.c | 55 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() local 65 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12() 66 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_div_x12()
|
D | vsigmoid-neonfma-rr1-lut2048-p1-nr2recps-x12.c | 55 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() local 65 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12() 66 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_vsigmoid_ukernel__neonfma_rr1_lut2048_p1_nr2recps_x12()
|
/external/XNNPACK/src/f32-velu/gen/ |
D | velu-sse2-rr2-lut16-p3-x12.c | 62 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() local 84 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 85 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 116 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 117 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 118 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12() 119 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x12()
|
D | velu-sse41-rr2-lut16-p3-x12.c | 62 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() local 80 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 81 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 106 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 107 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 108 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12() 109 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x12()
|
D | velu-sse2-rr2-lut16-p3-x16.c | 65 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() local 89 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 90 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 130 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 131 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 132 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16() 133 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x16()
|
D | velu-sse41-rr2-lut16-p3-x16.c | 65 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() local 85 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 86 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 118 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 119 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 120 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16() 121 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x16()
|
D | velu-sse2-rr2-lut16-p3-x20.c | 68 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() local 94 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 95 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 144 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 145 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 146 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20() 147 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x20()
|
D | velu-sse41-rr2-lut16-p3-x20.c | 68 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() local 90 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 91 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 130 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 131 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 132 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20() 133 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x20()
|
D | velu-sse2-rr2-lut16-p3-x24.c | 71 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() local 99 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 100 const uint64_t vidxAB = (uint64_t) _mm_cvtsi128_si64(_mm_unpackhi_epi64(vidx89AB, vidx89AB)); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 158 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 159 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 160 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24() 161 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse2_rr2_lut16_p3_x24()
|
D | velu-sse41-rr2-lut16-p3-x24.c | 71 … const __m128i vidx89AB = _mm_slli_epi32(_mm_and_si128(_mm_castps_si128(vn89AB), vindex_mask), 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() local 95 const uint64_t vidx89 = (uint64_t) _mm_cvtsi128_si64(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 96 const uint64_t vidxAB = (uint64_t) _mm_extract_epi64(vidx89AB, 1); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 142 const uint32_t vidx8 = (uint32_t) _mm_cvtsi128_si32(vidx89AB); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 143 const uint32_t vidx9 = (uint32_t) _mm_extract_epi16(vidx89AB, 2); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 144 const uint32_t vidxA = (uint32_t) _mm_extract_epi16(vidx89AB, 4); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24() 145 const uint32_t vidxB = (uint32_t) _mm_extract_epi16(vidx89AB, 6); in xnn_f32_velu_ukernel__sse41_rr2_lut16_p3_x24()
|
D | velu-wasmsimd-arm-rr2-lut16-p3-x12.c | 62 const v128_t vidx89AB = wasm_i32x4_shl(wasm_v128_and(vn89AB, vindex_mask), 2); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12() local 79 const uint64_t vidx89 = wasm_i64x2_extract_lane(vidx89AB, 0); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12() 80 const uint64_t vidxAB = wasm_i64x2_extract_lane(vidx89AB, 1); in xnn_f32_velu_ukernel__wasmsimd_arm_rr2_lut16_p3_x12()
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | neonfma-rr1-lut64-p2-x12-acc2.c | 63 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2() local 64 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2() 65 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc2()
|
D | neonfma-rr1-lut64-p2-x12-acc3.c | 64 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3() local 65 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3() 66 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12_acc3()
|
D | neonfma-rr1-lut64-p2-x12.c | 62 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12() local 63 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12() 64 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neonfma_rr1_lut64_p2_x12()
|
D | neon-rr2-lut64-p2-x12.c | 63 …const uint64x2_t vidx89AB = vreinterpretq_u64_s32(vandq_s32(vreinterpretq_s32_f32(vn89AB), vindex_… in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12() local 64 const uint64_t vidx89 = vgetq_lane_u64(vidx89AB, 0); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12() 65 const uint64_t vidxAB = vgetq_lane_u64(vidx89AB, 1); in xnn_f32_raddstoreexpminusmax_ukernel__neon_rr2_lut64_p2_x12()
|