Home
last modified time | relevance | path

Searched refs:vzero (Results 1 – 25 of 67) sorted by relevance

123

/external/XNNPACK/src/q8-avgpool/
Dmp9p8q-sse2.c33 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_avgpool_ukernel_mp9p8q__sse2() local
62 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
63 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
64 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
65 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
66 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
67 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
68 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
69 const __m128i vxi7 = _mm_unpacklo_epi8(vi7, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
70 const __m128i vxi8 = _mm_unpacklo_epi8(vi8, vzero); in xnn_q8_avgpool_ukernel_mp9p8q__sse2()
[all …]
Dup9-sse2.c33 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_avgpool_ukernel_up9__sse2() local
86 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
87 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
88 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
89 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
90 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
91 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
92 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
93 const __m128i vxi7 = _mm_unpacklo_epi8(vi7, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
94 const __m128i vxi8 = _mm_unpacklo_epi8(vi8, vzero); in xnn_q8_avgpool_ukernel_up9__sse2()
[all …]
/external/XNNPACK/src/q8-gavgpool/
Dmp7p7q-sse2.c40 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2() local
52 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
53 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
54 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
55 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
56 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
57 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
58 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
68 const __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
69 const __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_mp7p7q__sse2()
[all …]
Dup7-sse2.c56 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_gavgpool_ukernel_up7__sse2() local
70 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
71 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
72 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
73 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
74 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
75 const __m128i vxi5 = _mm_unpacklo_epi8(vi5, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
76 const __m128i vxi6 = _mm_unpacklo_epi8(vi6, vzero); in xnn_q8_gavgpool_ukernel_up7__sse2()
86 __m128i vacc_lo = _mm_add_epi32(vbias, _mm_unpacklo_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_up7__sse2()
87 __m128i vacc_hi = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vsum, vzero)); in xnn_q8_gavgpool_ukernel_up7__sse2()
[all …]
/external/libaom/libaom/aom_dsp/x86/
Dsum_squares_avx2.c111 __m256i vzero = _mm256_setzero_si256(); in aom_var_2d_u8_avx2() local
112 __m256i v_acc_sum = vzero; in aom_var_2d_u8_avx2()
113 __m256i v_acc_sqs = vzero; in aom_var_2d_u8_avx2()
127 __m256i vsrc0 = _mm256_unpacklo_epi8(vsrc[k], vzero); in aom_var_2d_u8_avx2()
128 __m256i vsrc1 = _mm256_unpackhi_epi8(vsrc[k], vzero); in aom_var_2d_u8_avx2()
141 v_acc_sum = vzero; in aom_var_2d_u8_avx2()
142 v_acc_sqs = vzero; in aom_var_2d_u8_avx2()
148 __m256i vsrc0 = _mm256_unpacklo_epi8(vsrc, vzero); in aom_var_2d_u8_avx2()
149 __m256i vsrc1 = _mm256_unpackhi_epi8(vsrc, vzero); in aom_var_2d_u8_avx2()
164 v_acc_sum = vzero; in aom_var_2d_u8_avx2()
[all …]
Dsum_squares_sse2.c229 __m128i vzero = _mm_setzero_si128(); in aom_var_2d_u8_sse2() local
230 __m128i v_acc_sum = vzero; in aom_var_2d_u8_sse2()
231 __m128i v_acc_sqs = vzero; in aom_var_2d_u8_sse2()
245 __m128i vsrc0 = _mm_unpacklo_epi8(vsrc[k], vzero); in aom_var_2d_u8_sse2()
246 __m128i vsrc1 = _mm_unpackhi_epi8(vsrc[k], vzero); in aom_var_2d_u8_sse2()
259 v_acc_sum = vzero; in aom_var_2d_u8_sse2()
260 v_acc_sqs = vzero; in aom_var_2d_u8_sse2()
266 __m128i vsrc0 = _mm_unpacklo_epi8(vsrc, vzero); in aom_var_2d_u8_sse2()
267 __m128i vsrc1 = _mm_unpackhi_epi8(vsrc, vzero); in aom_var_2d_u8_sse2()
282 v_acc_sum = vzero; in aom_var_2d_u8_sse2()
[all …]
/external/XNNPACK/src/q8-dwconv/
Dup8x9-sse2.c25 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_dwconv_ukernel_up8x9__sse2() local
47 const __m128i vxi0 = _mm_unpacklo_epi8(vi0, vzero); in xnn_q8_dwconv_ukernel_up8x9__sse2()
49 const __m128i vxk0 = _mm_sub_epi16(_mm_unpacklo_epi8(vk0, vzero), vkernel_zero_point); in xnn_q8_dwconv_ukernel_up8x9__sse2()
56 const __m128i vxi1 = _mm_unpacklo_epi8(vi1, vzero); in xnn_q8_dwconv_ukernel_up8x9__sse2()
58 const __m128i vxk1 = _mm_sub_epi16(_mm_unpacklo_epi8(vk1, vzero), vkernel_zero_point); in xnn_q8_dwconv_ukernel_up8x9__sse2()
65 const __m128i vxi2 = _mm_unpacklo_epi8(vi2, vzero); in xnn_q8_dwconv_ukernel_up8x9__sse2()
67 const __m128i vxk2 = _mm_sub_epi16(_mm_unpacklo_epi8(vk2, vzero), vkernel_zero_point); in xnn_q8_dwconv_ukernel_up8x9__sse2()
74 const __m128i vxi3 = _mm_unpacklo_epi8(vi3, vzero); in xnn_q8_dwconv_ukernel_up8x9__sse2()
76 const __m128i vxk3 = _mm_sub_epi16(_mm_unpacklo_epi8(vk3, vzero), vkernel_zero_point); in xnn_q8_dwconv_ukernel_up8x9__sse2()
83 const __m128i vxi4 = _mm_unpacklo_epi8(vi4, vzero); in xnn_q8_dwconv_ukernel_up8x9__sse2()
[all …]
/external/XNNPACK/src/q8-gemm/
D4x4c2-sse2.c63 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_gemm_ukernel_4x4c2__sse2() local
67 const __m128i vxa0 = _mm_unpacklo_epi8(va0, vzero); in xnn_q8_gemm_ukernel_4x4c2__sse2()
70 const __m128i vxa1 = _mm_unpacklo_epi8(va1, vzero); in xnn_q8_gemm_ukernel_4x4c2__sse2()
73 const __m128i vxa2 = _mm_unpacklo_epi8(va2, vzero); in xnn_q8_gemm_ukernel_4x4c2__sse2()
76 const __m128i vxa3 = _mm_unpacklo_epi8(va3, vzero); in xnn_q8_gemm_ukernel_4x4c2__sse2()
80 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_4x4c2__sse2()
92 const __m128i vxb1 = _mm_sub_epi16(_mm_unpacklo_epi8(vb1, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_4x4c2__sse2()
104 const __m128i vxb2 = _mm_sub_epi16(_mm_unpacklo_epi8(vb2, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_4x4c2__sse2()
116 const __m128i vxb3 = _mm_sub_epi16(_mm_unpacklo_epi8(vb3, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_4x4c2__sse2()
133 const __m128i vxa0 = _mm_unpacklo_epi8(va0, vzero); in xnn_q8_gemm_ukernel_4x4c2__sse2()
[all …]
D2x4c8-sse2.c75 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_gemm_ukernel_2x4c8__sse2() local
78 const __m128i vxa0 = _mm_unpacklo_epi8(va0, vzero); in xnn_q8_gemm_ukernel_2x4c8__sse2()
81 const __m128i vxa1 = _mm_unpacklo_epi8(va1, vzero); in xnn_q8_gemm_ukernel_2x4c8__sse2()
85 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_2x4c8__sse2()
87 const __m128i vxb1 = _mm_sub_epi16(_mm_unpacklo_epi8(vb1, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_2x4c8__sse2()
89 const __m128i vxb2 = _mm_sub_epi16(_mm_unpacklo_epi8(vb2, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_2x4c8__sse2()
91 const __m128i vxb3 = _mm_sub_epi16(_mm_unpacklo_epi8(vb3, vzero), vb_zero_point); in xnn_q8_gemm_ukernel_2x4c8__sse2()
/external/XNNPACK/src/q8-igemm/
D4x4c2-sse2.c60 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_igemm_ukernel_4x4c2__sse2() local
84 const __m128i vxa0 = _mm_unpacklo_epi8(va0, vzero); in xnn_q8_igemm_ukernel_4x4c2__sse2()
87 const __m128i vxa1 = _mm_unpacklo_epi8(va1, vzero); in xnn_q8_igemm_ukernel_4x4c2__sse2()
90 const __m128i vxa2 = _mm_unpacklo_epi8(va2, vzero); in xnn_q8_igemm_ukernel_4x4c2__sse2()
93 const __m128i vxa3 = _mm_unpacklo_epi8(va3, vzero); in xnn_q8_igemm_ukernel_4x4c2__sse2()
97 const __m128i vxb0 = _mm_sub_epi16(_mm_unpacklo_epi8(vb0, vzero), vb_zero_point); in xnn_q8_igemm_ukernel_4x4c2__sse2()
104 const __m128i vxb1 = _mm_sub_epi16(_mm_unpacklo_epi8(vb1, vzero), vb_zero_point); in xnn_q8_igemm_ukernel_4x4c2__sse2()
111 const __m128i vxb2 = _mm_sub_epi16(_mm_unpacklo_epi8(vb2, vzero), vb_zero_point); in xnn_q8_igemm_ukernel_4x4c2__sse2()
118 const __m128i vxb3 = _mm_sub_epi16(_mm_unpacklo_epi8(vb3, vzero), vb_zero_point); in xnn_q8_igemm_ukernel_4x4c2__sse2()
130 const __m128i vxa0 = _mm_unpacklo_epi8(va0, vzero); in xnn_q8_igemm_ukernel_4x4c2__sse2()
[all …]
/external/XNNPACK/src/f32-hswish/gen/
Dneonfma-x8.c30 const float32x4_t vzero = vdupq_n_f32(0.0f); in xnn_f32_hswish_ukernel__neonfma_x8() local
39 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neonfma_x8()
40 vacc4567 = vmaxq_f32(vacc4567, vzero); in xnn_f32_hswish_ukernel__neonfma_x8()
54 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neonfma_x8()
62 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neonfma_x8()
Dpsimd-x8.c30 const psimd_f32 vzero = psimd_splat_f32(0.0f); in xnn_f32_hswish_ukernel__psimd_x8() local
40 vacc0123 = psimd_max_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__psimd_x8()
41 vacc4567 = psimd_max_f32(vacc4567, vzero); in xnn_f32_hswish_ukernel__psimd_x8()
57 vacc0123 = psimd_max_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__psimd_x8()
66 vacc0123 = psimd_max_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__psimd_x8()
Dneon-x8.c30 const float32x4_t vzero = vdupq_n_f32(0.0f); in xnn_f32_hswish_ukernel__neon_x8() local
39 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neon_x8()
40 vacc4567 = vmaxq_f32(vacc4567, vzero); in xnn_f32_hswish_ukernel__neon_x8()
54 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neon_x8()
62 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neon_x8()
Dsse-x8.c30 const __m128 vzero = _mm_setzero_ps(); in xnn_f32_hswish_ukernel__sse_x8() local
43 vacc0123 = _mm_max_ps(vacc0123, vzero); in xnn_f32_hswish_ukernel__sse_x8()
44 vacc4567 = _mm_max_ps(vacc4567, vzero); in xnn_f32_hswish_ukernel__sse_x8()
61 vacc0123 = _mm_max_ps(vacc0123, vzero); in xnn_f32_hswish_ukernel__sse_x8()
71 vacc0123 = _mm_max_ps(vacc0123, vzero); in xnn_f32_hswish_ukernel__sse_x8()
Davx512f-x32.c31 const __m512 vzero = _mm512_setzero_ps(); in xnn_f32_hswish_ukernel__avx512f_x32() local
41 vacc0123456789ABCDEF = _mm512_max_ps(vacc0123456789ABCDEF, vzero); in xnn_f32_hswish_ukernel__avx512f_x32()
42 vaccGHIJKLMNOPQRSTUV = _mm512_max_ps(vaccGHIJKLMNOPQRSTUV, vzero); in xnn_f32_hswish_ukernel__avx512f_x32()
58 vacc = _mm512_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx512f_x32()
73 vacc = _mm512_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx512f_x32()
Dfma3-x16.c32 const __m256 vzero = _mm256_setzero_ps(); in xnn_f32_hswish_ukernel__fma3_x16() local
42 vacc01234567 = _mm256_max_ps(vacc01234567, vzero); in xnn_f32_hswish_ukernel__fma3_x16()
43 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEF, vzero); in xnn_f32_hswish_ukernel__fma3_x16()
59 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__fma3_x16()
72 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__fma3_x16()
Davx-x16.c32 const __m256 vzero = _mm256_setzero_ps(); in xnn_f32_hswish_ukernel__avx_x16() local
45 vacc01234567 = _mm256_max_ps(vacc01234567, vzero); in xnn_f32_hswish_ukernel__avx_x16()
46 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEF, vzero); in xnn_f32_hswish_ukernel__avx_x16()
63 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx_x16()
77 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx_x16()
Dpsimd-x4.c30 const psimd_f32 vzero = psimd_splat_f32(0.0f); in xnn_f32_hswish_ukernel__psimd_x4() local
38 vacc0123 = psimd_max_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__psimd_x4()
51 vacc0123 = psimd_max_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__psimd_x4()
60 vacc0123 = psimd_max_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__psimd_x4()
Davx512f-x16.c31 const __m512 vzero = _mm512_setzero_ps(); in xnn_f32_hswish_ukernel__avx512f_x16() local
39 vacc0123456789ABCDEF = _mm512_max_ps(vacc0123456789ABCDEF, vzero); in xnn_f32_hswish_ukernel__avx512f_x16()
52 vacc = _mm512_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx512f_x16()
67 vacc = _mm512_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx512f_x16()
Dneon-x4.c30 const float32x4_t vzero = vdupq_n_f32(0.0f); in xnn_f32_hswish_ukernel__neon_x4() local
37 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neon_x4()
48 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neon_x4()
56 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neon_x4()
Dneonfma-x4.c30 const float32x4_t vzero = vdupq_n_f32(0.0f); in xnn_f32_hswish_ukernel__neonfma_x4() local
37 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neonfma_x4()
48 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neonfma_x4()
56 vacc0123 = vmaxq_f32(vacc0123, vzero); in xnn_f32_hswish_ukernel__neonfma_x4()
Dsse-x4.c30 const __m128 vzero = _mm_setzero_ps(); in xnn_f32_hswish_ukernel__sse_x4() local
40 vacc0123 = _mm_max_ps(vacc0123, vzero); in xnn_f32_hswish_ukernel__sse_x4()
54 vacc0123 = _mm_max_ps(vacc0123, vzero); in xnn_f32_hswish_ukernel__sse_x4()
64 vacc0123 = _mm_max_ps(vacc0123, vzero); in xnn_f32_hswish_ukernel__sse_x4()
Dfma3-x8.c32 const __m256 vzero = _mm256_setzero_ps(); in xnn_f32_hswish_ukernel__fma3_x8() local
40 vacc01234567 = _mm256_max_ps(vacc01234567, vzero); in xnn_f32_hswish_ukernel__fma3_x8()
53 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__fma3_x8()
66 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__fma3_x8()
Davx-x8.c32 const __m256 vzero = _mm256_setzero_ps(); in xnn_f32_hswish_ukernel__avx_x8() local
42 vacc01234567 = _mm256_max_ps(vacc01234567, vzero); in xnn_f32_hswish_ukernel__avx_x8()
56 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx_x8()
70 vacc = _mm256_max_ps(vacc, vzero); in xnn_f32_hswish_ukernel__avx_x8()
/external/XNNPACK/src/q8-vadd/
Dsse2.c32 const __m128i vzero = _mm_setzero_si128(); in xnn_q8_vadd_ukernel__sse2() local
39 const __m128i vxa = _mm_unpacklo_epi8(va, vzero); in xnn_q8_vadd_ukernel__sse2()
40 const __m128i vxb = _mm_unpacklo_epi8(vb, vzero); in xnn_q8_vadd_ukernel__sse2()
81 const __m128i vxa = _mm_unpacklo_epi8(va, vzero); in xnn_q8_vadd_ukernel__sse2()
82 const __m128i vxb = _mm_unpacklo_epi8(vb, vzero); in xnn_q8_vadd_ukernel__sse2()

123