Home
last modified time | relevance | path

Searched refs:sse2 (Results 1 – 25 of 1735) sorted by relevance

12345678910>>...70

/external/tensorflow/third_party/jpeg/
DBUILD.bazel186 "simd/x86_64/jccolor-sse2.o",
188 "simd/x86_64/jcgray-sse2.o",
189 "simd/x86_64/jchuff-sse2.o",
190 "simd/x86_64/jcphuff-sse2.o",
192 "simd/x86_64/jcsample-sse2.o",
194 "simd/x86_64/jdcolor-sse2.o",
196 "simd/x86_64/jdmerge-sse2.o",
198 "simd/x86_64/jdsample-sse2.o",
200 "simd/x86_64/jfdctfst-sse2.o",
202 "simd/x86_64/jfdctint-sse2.o",
[all …]
/external/rust/crates/ppv-lite86/src/x86_64/
Dmod.rs6 mod sse2; module
39 sse2::u128x1_sse2<S3, S4, NI>: Swap64,
40 sse2::u64x2_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>,
41 sse2::u32x4_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>,
42 sse2::u64x4_sse2<S3, S4, NI>: BSwap + Words4,
43 sse2::u128x1_sse2<S3, S4, NI>: BSwap,
44 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x2x2_sse2<S3, S4, NI>>,
45 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x4_sse2<S3, S4, NI>>,
46 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u32x4x2_sse2<S3, S4, NI>>,
47 sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u64x2x4_sse2<S3, S4, NI>>,
[all …]
/external/XNNPACK/src/xnnpack/
Dparams-init.h92 params.sse2.kernel_zero_point[i] = (int16_t) (uint16_t) kernel_zero_point; in xnn_init_qu8_gemm_params()
94 params.sse2.multiplier[0] = multiplier; in xnn_init_qu8_gemm_params()
95 params.sse2.multiplier[1] = multiplier; in xnn_init_qu8_gemm_params()
96 params.sse2.multiplier[2] = multiplier; in xnn_init_qu8_gemm_params()
97 params.sse2.multiplier[3] = multiplier; in xnn_init_qu8_gemm_params()
98 params.sse2.rounding[0] = UINT64_C(0x40000000); in xnn_init_qu8_gemm_params()
99 params.sse2.rounding[1] = UINT64_C(0x40000000); in xnn_init_qu8_gemm_params()
100 params.sse2.remainder_mask[0] = (int32_t) remainder_mask; in xnn_init_qu8_gemm_params()
101 params.sse2.remainder_mask[1] = (int32_t) remainder_mask; in xnn_init_qu8_gemm_params()
102 params.sse2.remainder_mask[2] = (int32_t) remainder_mask; in xnn_init_qu8_gemm_params()
[all …]
/external/llvm/test/CodeGen/X86/
Dsse2-intrinsics-x86-upgrade.ll2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s
9 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
12 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
20 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
23 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
30 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
33 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
41 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
44 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
52 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
[all …]
Dsse2-intrinsics-x86.ll2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE
15 …%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>…
18 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
31 …%res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x d…
34 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
47 …%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x d…
50 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
71 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
74 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
91 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
[all …]
Dpic-load-remat.ll1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
8 …%tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroin…
9 …%tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroin…
10 …%tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast…
12 …%tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x …
13 …%tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -231…
14 …%tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) no…
16 …%tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializ…
19 …%tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x …
21 …%tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializ…
[all …]
Dvec_shift5.ll2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64
19 …%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 …
33 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1…
47 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1…
61 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3)
75 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
89 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
104 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3)
119 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3)
[all …]
/external/llvm-project/clang/include/clang/Basic/
DX86Target.def78 CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2")
79 CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2")
80 CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
81 CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
82 CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
83 CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
84 CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
85 CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
86 CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt…
87 CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+…
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dhighbd_variance_sse2.c261 DECLS(sse2);
277 unsigned int sse2; \
280 &sse2, NULL, NULL); \
282 sse += sse2; \
286 &sse2, NULL, NULL); \
288 sse += sse2; \
291 &sse2, NULL, NULL); \
293 sse += sse2; \
311 uint32_t sse2; \
314 &sse2, NULL, NULL); \
[all …]
/external/libjpeg-turbo/
DBUILD.gn39 "simd/i386/jccolor-sse2.asm",
42 "simd/i386/jcgray-sse2.asm",
43 "simd/i386/jchuff-sse2.asm",
44 "simd/i386/jcphuff-sse2.asm",
47 "simd/i386/jcsample-sse2.asm",
50 "simd/i386/jdcolor-sse2.asm",
53 "simd/i386/jdmerge-sse2.asm",
56 "simd/i386/jdsample-sse2.asm",
60 "simd/i386/jfdctfst-sse2.asm",
63 "simd/i386/jfdctint-sse2.asm",
[all …]
DAndroid.bp161 "simd/i386/jccolor-sse2.asm",
164 "simd/i386/jcgray-sse2.asm",
165 "simd/i386/jchuff-sse2.asm",
166 "simd/i386/jcphuff-sse2.asm",
169 "simd/i386/jcsample-sse2.asm",
172 "simd/i386/jdcolor-sse2.asm",
175 "simd/i386/jdmerge-sse2.asm",
178 "simd/i386/jdsample-sse2.asm",
182 "simd/i386/jfdctfst-sse2.asm",
185 "simd/i386/jfdctint-sse2.asm",
[all …]
/external/llvm-project/llvm/test/Transforms/InstCombine/X86/
Dx86-sse2.ll12 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
23 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
37 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
50 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
61 %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %1, <2 x double> %1)
75 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
88 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
99 %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %1, <2 x double> %1)
113 %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
126 %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
[all …]
/external/llvm-project/llvm/test/CodeGen/X86/
Dvec_shift5.ll2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64
14 …%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 …
23 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1…
32 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1…
41 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3)
50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
59 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3)
73 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3)
87 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3)
[all …]
Dfptosi-constant.ll11 %r = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 5.0e+09, double undef>)
20 %r = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 5.0e+09, double undef>)
29 %r = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 5.0e+19, double undef>)
38 %r = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 5.0e+19, double undef>)
47 %r = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> <double 5.0e+09, double 5.0e+09>)
56 %r = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> <double 5.0e+09, double 5.0e+09>)
103 …%r = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> <float 5.0e+09, float 5.0e+09, float 5.0e+…
112 …%r = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> <float 5.0e+09, float 5.0e+09, float 5.0e…
116 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
117 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
[all …]
/external/llvm/test/Transforms/InstCombine/
Dx86-sse2.ll8 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP1]])
14 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
25 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
32 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x …
36 %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1)
49 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
62 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
69 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x …
73 %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1)
86 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
[all …]
/external/XNNPACK/src/qu8-vadd/
Dminmax-sse2.c23 …const __m128i vzero_point_product = _mm_load_si128((const __m128i*) &params->sse2.zero_point_produ… in xnn_qu8_vadd_minmax_ukernel__sse2()
24 const __m128i va_multiplier_lo = _mm_load_si128((const __m128i*) &params->sse2.a_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2()
25 const __m128i va_multiplier_hi = _mm_load_si128((const __m128i*) &params->sse2.a_multiplier_hi); in xnn_qu8_vadd_minmax_ukernel__sse2()
26 const __m128i vb_multiplier_lo = _mm_load_si128((const __m128i*) &params->sse2.b_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2()
27 const __m128i vb_multiplier_hi = _mm_load_si128((const __m128i*) &params->sse2.b_multiplier_hi); in xnn_qu8_vadd_minmax_ukernel__sse2()
28 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qu8_vadd_minmax_ukernel__sse2()
29 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qu8_vadd_minmax_ukernel__sse2()
30 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qu8_vadd_minmax_ukernel__sse2()
68 const __m128i vy_zero_point = _mm_load_si128((const __m128i*) params->sse2.y_zero_point); in xnn_qu8_vadd_minmax_ukernel__sse2()
71 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dhighbd_variance_sse2.c281 DECLS(sse2);
294 unsigned int sse2; \
300 src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse2, \
303 sse += sse2; \
307 &sse2, NULL, NULL); \
309 sse += sse2; \
313 dst_stride, h, &sse2, NULL, NULL); \
315 sse += sse2; \
318 dst_stride, h, &sse2, NULL, NULL); \
320 sse += sse2; \
[all …]
/external/XNNPACK/src/qs8-vaddc/gen/
Dminmax-avx2-mul32-ld64-x8.c25 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.x_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
26 …r_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_mask)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
27 …old = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
34 _mm_broadcastd_epi32(_mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y)), in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
35 _mm_load_si128((const __m128i*) params->sse2.zero_point_product))); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
Dminmax-sse41-mul16-ld64-x8.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
25 const __m128i vx_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_hi); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
26 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
27 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
33 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
35 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
Dminmax-sse2-mul16-ld64-x8.c24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
25 const __m128i vx_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_hi); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
26 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
27 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
33 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
35 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
Dminmax-sse41-mul32-ld32-x8.c25 const __m128i vx_multiplier = _mm_load_si128((const __m128i*) params->sse2.x_multiplier); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
26 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
27 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
33 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
35 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
Dminmax-xop-mul32-ld32-x8.c30 const __m128i vx_multiplier = _mm_load_si128((const __m128i*) params->sse2.x_multiplier); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
31 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
32 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
33 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
34 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
35 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
36 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
38 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
40 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
Dminmax-avx2-mul32-ld64-x16.c25 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.x_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
26 …r_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_mask)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
27 …old = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
29 …int = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_zero_point)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
30 …output_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
31 …output_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_max)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
34 _mm_broadcastd_epi32(_mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y)), in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
35 _mm_load_si128((const __m128i*) params->sse2.zero_point_product))); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-avx2-mul32-ld64-x8.c25 …uct = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
26 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.x_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
27 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.y_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
28 …r_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_mask)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
29 …old = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
30 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
31 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
32 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
33 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
Dminmax-sse41-mul16-ld64-x8.c24 …const __m128i vzero_point_product = _mm_load_si128((const __m128i*) params->sse2.zero_point_produc… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
26 const __m128i vx_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_hi); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
28 const __m128i vy_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_hi); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
29 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
30 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
31 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
32 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
33 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8()
[all …]

12345678910>>...70