Home
last modified time | relevance | path

Searched refs:_mm_castps_si128 (Results 1 – 25 of 263) sorted by relevance

1234567891011

/external/XNNPACK/src/f32-f16-vcvt/gen/
Dvcvt-avx-x32.c70 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
71 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
72 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
74 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
75 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
76 __m128i vbias6 = _mm_add_epi32(_mm_castps_si128(vabsx6), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
77 __m128i vbias7 = _mm_add_epi32(_mm_castps_si128(vabsx7), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32()
88 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x32()
89 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x32()
[all …]
Dvcvt-sse2-x32.c70 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
71 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
72 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
74 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
75 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
76 __m128i vbias6 = _mm_add_epi32(_mm_castps_si128(vabsx6), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
77 __m128i vbias7 = _mm_add_epi32(_mm_castps_si128(vabsx7), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
88 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
89 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x32()
[all …]
Dvcvt-sse41-x32.c70 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
71 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
72 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
74 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
75 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
76 __m128i vbias6 = _mm_add_epi32(_mm_castps_si128(vabsx6), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
77 __m128i vbias7 = _mm_add_epi32(_mm_castps_si128(vabsx7), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
88 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
89 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x32()
[all …]
Dvcvt-sse2-x24.c64 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
65 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
66 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
68 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
69 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
78 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
79 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
80 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
81 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24()
[all …]
Dvcvt-sse41-x24.c64 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
65 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
66 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
68 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
69 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
78 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
79 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
80 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
81 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24()
[all …]
Dvcvt-avx-x24.c64 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24()
65 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24()
66 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24()
67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24()
68 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24()
69 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24()
78 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24()
79 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24()
80 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24()
81 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24()
[all …]
Dvcvt-sse41-x16.c58 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
59 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
60 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
68 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
69 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
70 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
71 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
86 const __m128i vsignh0 = _mm_packs_epi32(_mm_castps_si128(vsignx0), _mm_castps_si128(vsignx1)); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
87 const __m128i vsignh1 = _mm_packs_epi32(_mm_castps_si128(vsignx2), _mm_castps_si128(vsignx3)); in xnn_f32_f16_vcvt_ukernel__sse41_x16()
[all …]
Dvcvt-avx-x16.c58 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16()
59 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16()
60 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16()
61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16()
68 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16()
69 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16()
70 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16()
71 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16()
86 const __m128i vsignh0 = _mm_packs_epi32(_mm_castps_si128(vsignx0), _mm_castps_si128(vsignx1)); in xnn_f32_f16_vcvt_ukernel__avx_x16()
87 const __m128i vsignh1 = _mm_packs_epi32(_mm_castps_si128(vsignx2), _mm_castps_si128(vsignx3)); in xnn_f32_f16_vcvt_ukernel__avx_x16()
[all …]
Dvcvt-sse2-x16.c58 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
59 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
60 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
68 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
69 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
70 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
71 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
86 const __m128i vsignh0 = _mm_packs_epi32(_mm_castps_si128(vsignx0), _mm_castps_si128(vsignx1)); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
87 const __m128i vsignh1 = _mm_packs_epi32(_mm_castps_si128(vsignx2), _mm_castps_si128(vsignx3)); in xnn_f32_f16_vcvt_ukernel__sse2_x16()
[all …]
Dvcvt-avx-x8.c51 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x8()
52 __m128i vbias_hi = _mm_add_epi32(_mm_castps_si128(vabsx_hi), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x8()
55 const __m128i vnanmaskw_lo = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_lo), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x8()
56 const __m128i vnanmaskw_hi = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_hi), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x8()
63 … const __m128i vsignh = _mm_packs_epi32(_mm_castps_si128(vsignx_lo), _mm_castps_si128(vsignx_hi)); in xnn_f32_f16_vcvt_ukernel__avx_x8()
71 __m128i vexpw_lo = _mm_srli_epi32(_mm_castps_si128(vf_lo), 13); in xnn_f32_f16_vcvt_ukernel__avx_x8()
72 __m128i vexpw_hi = _mm_srli_epi32(_mm_castps_si128(vf_hi), 13); in xnn_f32_f16_vcvt_ukernel__avx_x8()
73 const __m128i vmantw_lo = _mm_and_si128(_mm_castps_si128(vf_lo), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__avx_x8()
74 const __m128i vmantw_hi = _mm_and_si128(_mm_castps_si128(vf_hi), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__avx_x8()
101 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x8()
[all …]
Dvcvt-sse2-x8.c51 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
52 __m128i vbias_hi = _mm_add_epi32(_mm_castps_si128(vabsx_hi), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
55 const __m128i vnanmaskw_lo = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_lo), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
56 const __m128i vnanmaskw_hi = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_hi), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
63 … const __m128i vsignh = _mm_packs_epi32(_mm_castps_si128(vsignx_lo), _mm_castps_si128(vsignx_hi)); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
73 __m128i vexpw_lo = _mm_srli_epi32(_mm_castps_si128(vf_lo), 13); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
74 __m128i vexpw_hi = _mm_srli_epi32(_mm_castps_si128(vf_hi), 13); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
75 const __m128i vmantw_lo = _mm_and_si128(_mm_castps_si128(vf_lo), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
76 const __m128i vmantw_hi = _mm_and_si128(_mm_castps_si128(vf_hi), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
101 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x8()
[all …]
Dvcvt-sse41-x8.c51 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
52 __m128i vbias_hi = _mm_add_epi32(_mm_castps_si128(vabsx_hi), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
55 const __m128i vnanmaskw_lo = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_lo), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
56 const __m128i vnanmaskw_hi = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_hi), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
63 … const __m128i vsignh = _mm_packs_epi32(_mm_castps_si128(vsignx_lo), _mm_castps_si128(vsignx_hi)); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
71 __m128i vexpw_lo = _mm_srli_epi32(_mm_castps_si128(vf_lo), 13); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
72 __m128i vexpw_hi = _mm_srli_epi32(_mm_castps_si128(vf_hi), 13); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
73 const __m128i vmantw_lo = _mm_and_si128(_mm_castps_si128(vf_lo), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
74 const __m128i vmantw_hi = _mm_and_si128(_mm_castps_si128(vf_hi), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
101 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x8()
[all …]
/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-ssse3-6x4.c156 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
157 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
158 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
159 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
160 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
161 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
162 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
163 …st __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
193 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
194 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
[all …]
D3x3p1-minmax-ssse3-4x4.c128 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
129 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
130 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
131 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
132 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
133 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
155 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
156 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
157 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
158 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
[all …]
D3x3p1-minmax-ssse3-1x4.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
130 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
131 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
132 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
139 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
[all …]
D3x3p1-minmax-ssse3-3x4.c114 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
115 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
116 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
117 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
118 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
136 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
137 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
138 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
139 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
140 …st __m128 vi4x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4()
[all …]
D3x3p1-minmax-ssse3-5x4.c142 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
143 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
144 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
145 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
146 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
147 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
148 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
174 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
175 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
176 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
[all …]
D3x3p1-minmax-ssse3-2x4.c100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
162 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
163 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4()
[all …]
D3x3p1-minmax-ssse3-2x4-acc2.c100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
164 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
165 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2()
[all …]
D3x3p1-minmax-ssse3-1x4-acc3.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
132 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
133 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
134 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
141 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3()
[all …]
D3x3p1-minmax-ssse3-1x4-acc2.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
131 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
132 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
133 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
140 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2()
[all …]
D3x3p1-minmax-ssse3-1x4-acc4.c86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
133 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
134 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
135 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128 in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
142 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4()
[all …]
/external/XNNPACK/src/f16-f32-vcvt/gen/
Dvcvt-sse41-int16-x32.c63 …const __m128i vnorm0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
64 …const __m128i vnorm1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
65 …const __m128i vnorm2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
66 …const __m128i vnorm3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
67 …const __m128i vnorm4 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
68 …const __m128i vnorm5 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
69 …const __m128i vnorm6 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
70 …const __m128i vnorm7 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
72 …const __m128i vdenorm0 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
73 …const __m128i vdenorm1 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32()
[all …]
Dvcvt-avx-int16-x32.c63 …const __m128i vnorm0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
64 …const __m128i vnorm1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
65 …const __m128i vnorm2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
66 …const __m128i vnorm3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
67 …const __m128i vnorm4 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
68 …const __m128i vnorm5 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
69 …const __m128i vnorm6 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
70 …const __m128i vnorm7 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
72 …const __m128i vdenorm0 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
73 …const __m128i vdenorm1 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__avx_int16_x32()
[all …]
Dvcvt-sse41-int16-x24.c58 …const __m128i vnorm0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
59 …const __m128i vnorm1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
60 …const __m128i vnorm2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
61 …const __m128i vnorm3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
62 …const __m128i vnorm4 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
63 …const __m128i vnorm5 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
65 …const __m128i vdenorm0 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
66 …const __m128i vdenorm1 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
67 …const __m128i vdenorm2 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign1… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
68 …const __m128i vdenorm3 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign1… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24()
[all …]

1234567891011