/external/tensorflow/third_party/jpeg/ |
D | BUILD.bazel | 186 "simd/x86_64/jccolor-sse2.o", 188 "simd/x86_64/jcgray-sse2.o", 189 "simd/x86_64/jchuff-sse2.o", 190 "simd/x86_64/jcphuff-sse2.o", 192 "simd/x86_64/jcsample-sse2.o", 194 "simd/x86_64/jdcolor-sse2.o", 196 "simd/x86_64/jdmerge-sse2.o", 198 "simd/x86_64/jdsample-sse2.o", 200 "simd/x86_64/jfdctfst-sse2.o", 202 "simd/x86_64/jfdctint-sse2.o", [all …]
|
/external/rust/crates/ppv-lite86/src/x86_64/ |
D | mod.rs | 6 mod sse2; module 39 sse2::u128x1_sse2<S3, S4, NI>: Swap64, 40 sse2::u64x2_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u64; 2]> + Vec2<u64>, 41 sse2::u32x4_sse2<S3, S4, NI>: BSwap + RotateEachWord32 + MultiLane<[u32; 4]> + Vec4<u32>, 42 sse2::u64x4_sse2<S3, S4, NI>: BSwap + Words4, 43 sse2::u128x1_sse2<S3, S4, NI>: BSwap, 44 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x2x2_sse2<S3, S4, NI>>, 45 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u64x4_sse2<S3, S4, NI>>, 46 sse2::u128x2_sse2<S3, S4, NI>: Into<sse2::u32x4x2_sse2<S3, S4, NI>>, 47 sse2::u128x4_sse2<S3, S4, NI>: Into<sse2::u64x2x4_sse2<S3, S4, NI>>, [all …]
|
/external/XNNPACK/src/xnnpack/ |
D | params-init.h | 92 params.sse2.kernel_zero_point[i] = (int16_t) (uint16_t) kernel_zero_point; in xnn_init_qu8_gemm_params() 94 params.sse2.multiplier[0] = multiplier; in xnn_init_qu8_gemm_params() 95 params.sse2.multiplier[1] = multiplier; in xnn_init_qu8_gemm_params() 96 params.sse2.multiplier[2] = multiplier; in xnn_init_qu8_gemm_params() 97 params.sse2.multiplier[3] = multiplier; in xnn_init_qu8_gemm_params() 98 params.sse2.rounding[0] = UINT64_C(0x40000000); in xnn_init_qu8_gemm_params() 99 params.sse2.rounding[1] = UINT64_C(0x40000000); in xnn_init_qu8_gemm_params() 100 params.sse2.remainder_mask[0] = (int32_t) remainder_mask; in xnn_init_qu8_gemm_params() 101 params.sse2.remainder_mask[1] = (int32_t) remainder_mask; in xnn_init_qu8_gemm_params() 102 params.sse2.remainder_mask[2] = (int32_t) remainder_mask; in xnn_init_qu8_gemm_params() [all …]
|
/external/llvm/test/CodeGen/X86/ |
D | sse2-intrinsics-x86-upgrade.ll | 2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s 9 %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 12 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone 20 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] 23 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone 30 %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 33 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone 41 %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1] 44 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone 52 %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] [all …]
|
D | sse2-intrinsics-x86.ll | 2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE 15 …%res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>… 18 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone 31 …%res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x d… 34 declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone 47 …%res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x d… 50 declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone 71 %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] 74 declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone 91 %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1] [all …]
|
D | pic-load-remat.ll | 1 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb 8 …%tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroin… 9 …%tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroin… 10 …%tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast… 12 …%tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x … 13 …%tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -231… 14 …%tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) no… 16 …%tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializ… 19 …%tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x … 21 …%tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializ… [all …]
|
D | vec_shift5.ll | 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 19 …%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 … 33 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1… 47 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1… 61 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 75 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 89 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 104 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 119 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) [all …]
|
/external/llvm-project/clang/include/clang/Basic/ |
D | X86Target.def | 78 CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2") 79 CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2") 80 CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") 81 CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") 82 CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") 83 CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") 84 CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") 85 CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") 86 CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt… 87 CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+… [all …]
|
/external/libvpx/libvpx/vpx_dsp/x86/ |
D | highbd_variance_sse2.c | 261 DECLS(sse2); 277 unsigned int sse2; \ 280 &sse2, NULL, NULL); \ 282 sse += sse2; \ 286 &sse2, NULL, NULL); \ 288 sse += sse2; \ 291 &sse2, NULL, NULL); \ 293 sse += sse2; \ 311 uint32_t sse2; \ 314 &sse2, NULL, NULL); \ [all …]
|
/external/libjpeg-turbo/ |
D | BUILD.gn | 39 "simd/i386/jccolor-sse2.asm", 42 "simd/i386/jcgray-sse2.asm", 43 "simd/i386/jchuff-sse2.asm", 44 "simd/i386/jcphuff-sse2.asm", 47 "simd/i386/jcsample-sse2.asm", 50 "simd/i386/jdcolor-sse2.asm", 53 "simd/i386/jdmerge-sse2.asm", 56 "simd/i386/jdsample-sse2.asm", 60 "simd/i386/jfdctfst-sse2.asm", 63 "simd/i386/jfdctint-sse2.asm", [all …]
|
D | Android.bp | 161 "simd/i386/jccolor-sse2.asm", 164 "simd/i386/jcgray-sse2.asm", 165 "simd/i386/jchuff-sse2.asm", 166 "simd/i386/jcphuff-sse2.asm", 169 "simd/i386/jcsample-sse2.asm", 172 "simd/i386/jdcolor-sse2.asm", 175 "simd/i386/jdmerge-sse2.asm", 178 "simd/i386/jdsample-sse2.asm", 182 "simd/i386/jfdctfst-sse2.asm", 185 "simd/i386/jfdctint-sse2.asm", [all …]
|
/external/llvm-project/llvm/test/Transforms/InstCombine/X86/ |
D | x86-sse2.ll | 12 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) 23 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) 37 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) 50 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) 61 %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %1, <2 x double> %1) 75 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4) 88 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4) 99 %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %1, <2 x double> %1) 113 %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4) 126 %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4) [all …]
|
/external/llvm-project/llvm/test/CodeGen/X86/ |
D | vec_shift5.ll | 2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64 14 …%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> <i16 1, i16 2, i16 4, i16 8, i16 1, i16 … 23 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1… 32 …%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 4, i16 8, i16 16, i16 32, i16 4, i1… 41 %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 1, i32 2, i32 4, i32 8>, i32 3) 50 %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 59 %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 4, i32 8, i32 16, i32 32>, i32 3) 73 %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 1, i64 2>, i32 3) 87 %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 8, i64 16>, i32 3) [all …]
|
D | fptosi-constant.ll | 11 %r = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 5.0e+09, double undef>) 20 %r = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 5.0e+09, double undef>) 29 %r = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 5.0e+19, double undef>) 38 %r = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 5.0e+19, double undef>) 47 %r = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> <double 5.0e+09, double 5.0e+09>) 56 %r = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> <double 5.0e+09, double 5.0e+09>) 103 …%r = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> <float 5.0e+09, float 5.0e+09, float 5.0e+… 112 …%r = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> <float 5.0e+09, float 5.0e+09, float 5.0e… 116 declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) 117 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) [all …]
|
/external/llvm/test/Transforms/InstCombine/ |
D | x86-sse2.ll | 8 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP1]]) 14 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) 25 %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) 32 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x … 36 %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1) 49 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) 62 %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4) 69 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x … 73 %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1) 86 %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4) [all …]
|
/external/XNNPACK/src/qu8-vadd/ |
D | minmax-sse2.c | 23 …const __m128i vzero_point_product = _mm_load_si128((const __m128i*) ¶ms->sse2.zero_point_produ… in xnn_qu8_vadd_minmax_ukernel__sse2() 24 const __m128i va_multiplier_lo = _mm_load_si128((const __m128i*) ¶ms->sse2.a_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2() 25 const __m128i va_multiplier_hi = _mm_load_si128((const __m128i*) ¶ms->sse2.a_multiplier_hi); in xnn_qu8_vadd_minmax_ukernel__sse2() 26 const __m128i vb_multiplier_lo = _mm_load_si128((const __m128i*) ¶ms->sse2.b_multiplier_lo); in xnn_qu8_vadd_minmax_ukernel__sse2() 27 const __m128i vb_multiplier_hi = _mm_load_si128((const __m128i*) ¶ms->sse2.b_multiplier_hi); in xnn_qu8_vadd_minmax_ukernel__sse2() 28 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qu8_vadd_minmax_ukernel__sse2() 29 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qu8_vadd_minmax_ukernel__sse2() 30 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qu8_vadd_minmax_ukernel__sse2() 68 const __m128i vy_zero_point = _mm_load_si128((const __m128i*) params->sse2.y_zero_point); in xnn_qu8_vadd_minmax_ukernel__sse2() 71 vy = _mm_max_epu8(vy, _mm_load_si128((const __m128i*) params->sse2.y_min)); in xnn_qu8_vadd_minmax_ukernel__sse2() [all …]
|
/external/libaom/libaom/aom_dsp/x86/ |
D | highbd_variance_sse2.c | 281 DECLS(sse2); 294 unsigned int sse2; \ 300 src, src_stride, x_offset, y_offset, dst, dst_stride, h, &sse2, \ 303 sse += sse2; \ 307 &sse2, NULL, NULL); \ 309 sse += sse2; \ 313 dst_stride, h, &sse2, NULL, NULL); \ 315 sse += sse2; \ 318 dst_stride, h, &sse2, NULL, NULL); \ 320 sse += sse2; \ [all …]
|
/external/XNNPACK/src/qs8-vaddc/gen/ |
D | minmax-avx2-mul32-ld64-x8.c | 25 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.x_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 26 …r_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_mask)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 27 …old = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 34 _mm_broadcastd_epi32(_mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y)), in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8() 35 _mm_load_si128((const __m128i*) params->sse2.zero_point_product))); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x8()
|
D | minmax-sse41-mul16-ld64-x8.c | 24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 25 const __m128i vx_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_hi); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 26 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 27 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 33 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8() 35 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul16_ld64_x8()
|
D | minmax-sse2-mul16-ld64-x8.c | 24 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 25 const __m128i vx_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_hi); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 26 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 27 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 33 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8() 35 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__sse2_mul16_ld64_x8()
|
D | minmax-sse41-mul32-ld32-x8.c | 25 const __m128i vx_multiplier = _mm_load_si128((const __m128i*) params->sse2.x_multiplier); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 26 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 27 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 29 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 30 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 31 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 33 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8() 35 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__sse41_mul32_ld32_x8()
|
D | minmax-xop-mul32-ld32-x8.c | 30 const __m128i vx_multiplier = _mm_load_si128((const __m128i*) params->sse2.x_multiplier); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 31 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 32 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 33 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 34 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 35 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 36 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 38 …__m128i vzero_point_product = _mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y); in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8() 40 …mm_add_epi32(vzero_point_product, _mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vaddc_minmax_ukernel__xop_mul32_ld32_x8()
|
D | minmax-avx2-mul32-ld64-x16.c | 25 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.x_multiplier)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 26 …r_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_mask)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 27 …old = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold… in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 28 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 29 …int = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_zero_point)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 30 …output_min = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_min)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 31 …output_max = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.output_max)); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 34 _mm_broadcastd_epi32(_mm_cvtsi32_si128(params->sse2.y_multiplier[0] * (int32_t) *input_y)), in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16() 35 _mm_load_si128((const __m128i*) params->sse2.zero_point_product))); in xnn_qs8_vaddc_minmax_ukernel__avx2_mul32_ld64_x16()
|
/external/XNNPACK/src/qs8-vadd/gen/ |
D | minmax-avx2-mul32-ld64-x8.c | 25 …uct = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.zero_point_product)… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 26 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.x_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 27 …ltiplier = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.y_multiplier)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 28 …r_mask = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_mask)); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 29 …old = _mm256_broadcastsi128_si256(_mm_load_si128((const __m128i*) params->sse2.remainder_threshold… in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 30 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 31 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 32 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8() 33 const __m128i voutput_max = _mm_load_si128((const __m128i*) params->sse2.output_max); in xnn_qs8_vadd_minmax_ukernel__avx2_mul32_ld64_x8()
|
D | minmax-sse41-mul16-ld64-x8.c | 24 …const __m128i vzero_point_product = _mm_load_si128((const __m128i*) params->sse2.zero_point_produc… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 25 const __m128i vx_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 26 const __m128i vx_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.x_multiplier_hi); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 27 const __m128i vy_multiplier_lo = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_lo); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 28 const __m128i vy_multiplier_hi = _mm_load_si128((const __m128i*) params->sse2.y_multiplier_hi); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 29 const __m128i vremainder_mask = _mm_load_si128((const __m128i*) params->sse2.remainder_mask); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 30 …const __m128i vremainder_threshold = _mm_load_si128((const __m128i*) params->sse2.remainder_thresh… in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 31 const __m128i vshift = _mm_cvtsi32_si128((int) params->sse2.shift); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 32 …const __m128i voutput_zero_point = _mm_load_si128((const __m128i*) params->sse2.output_zero_point); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() 33 const __m128i voutput_min = _mm_load_si128((const __m128i*) params->sse2.output_min); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x8() [all …]
|