/external/XNNPACK/src/f32-f16-vcvt/gen/ |
D | vcvt-avx-x32.c | 70 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 71 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 72 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 74 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 75 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 76 __m128i vbias6 = _mm_add_epi32(_mm_castps_si128(vabsx6), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 77 __m128i vbias7 = _mm_add_epi32(_mm_castps_si128(vabsx7), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x32() 88 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x32() 89 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x32() [all …]
|
D | vcvt-sse2-x32.c | 70 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 71 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 72 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 74 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 75 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 76 __m128i vbias6 = _mm_add_epi32(_mm_castps_si128(vabsx6), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 77 __m128i vbias7 = _mm_add_epi32(_mm_castps_si128(vabsx7), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 88 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x32() 89 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x32() [all …]
|
D | vcvt-sse41-x32.c | 70 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 71 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 72 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 73 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 74 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 75 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 76 __m128i vbias6 = _mm_add_epi32(_mm_castps_si128(vabsx6), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 77 __m128i vbias7 = _mm_add_epi32(_mm_castps_si128(vabsx7), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 88 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x32() 89 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x32() [all …]
|
D | vcvt-sse2-x24.c | 64 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 65 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 66 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 68 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 69 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 78 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 79 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 80 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24() 81 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x24() [all …]
|
D | vcvt-sse41-x24.c | 64 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 65 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 66 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 68 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 69 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 78 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 79 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 80 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24() 81 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x24() [all …]
|
D | vcvt-avx-x24.c | 64 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() 65 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() 66 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() 67 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() 68 __m128i vbias4 = _mm_add_epi32(_mm_castps_si128(vabsx4), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() 69 __m128i vbias5 = _mm_add_epi32(_mm_castps_si128(vabsx5), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x24() 78 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24() 79 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24() 80 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24() 81 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x24() [all …]
|
D | vcvt-sse41-x16.c | 58 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 59 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 60 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 68 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 69 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 70 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 71 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 86 const __m128i vsignh0 = _mm_packs_epi32(_mm_castps_si128(vsignx0), _mm_castps_si128(vsignx1)); in xnn_f32_f16_vcvt_ukernel__sse41_x16() 87 const __m128i vsignh1 = _mm_packs_epi32(_mm_castps_si128(vsignx2), _mm_castps_si128(vsignx3)); in xnn_f32_f16_vcvt_ukernel__sse41_x16() [all …]
|
D | vcvt-avx-x16.c | 58 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16() 59 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16() 60 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16() 61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x16() 68 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16() 69 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16() 70 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16() 71 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x16() 86 const __m128i vsignh0 = _mm_packs_epi32(_mm_castps_si128(vsignx0), _mm_castps_si128(vsignx1)); in xnn_f32_f16_vcvt_ukernel__avx_x16() 87 const __m128i vsignh1 = _mm_packs_epi32(_mm_castps_si128(vsignx2), _mm_castps_si128(vsignx3)); in xnn_f32_f16_vcvt_ukernel__avx_x16() [all …]
|
D | vcvt-sse2-x16.c | 58 __m128i vbias0 = _mm_add_epi32(_mm_castps_si128(vabsx0), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 59 __m128i vbias1 = _mm_add_epi32(_mm_castps_si128(vabsx1), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 60 __m128i vbias2 = _mm_add_epi32(_mm_castps_si128(vabsx2), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 61 __m128i vbias3 = _mm_add_epi32(_mm_castps_si128(vabsx3), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 68 const __m128i vnanmaskw0 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx0), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 69 const __m128i vnanmaskw1 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx1), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 70 const __m128i vnanmaskw2 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx2), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 71 const __m128i vnanmaskw3 = _mm_cmpgt_epi32(_mm_castps_si128(vabsx3), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 86 const __m128i vsignh0 = _mm_packs_epi32(_mm_castps_si128(vsignx0), _mm_castps_si128(vsignx1)); in xnn_f32_f16_vcvt_ukernel__sse2_x16() 87 const __m128i vsignh1 = _mm_packs_epi32(_mm_castps_si128(vsignx2), _mm_castps_si128(vsignx3)); in xnn_f32_f16_vcvt_ukernel__sse2_x16() [all …]
|
D | vcvt-avx-x8.c | 51 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x8() 52 __m128i vbias_hi = _mm_add_epi32(_mm_castps_si128(vabsx_hi), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x8() 55 const __m128i vnanmaskw_lo = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_lo), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x8() 56 const __m128i vnanmaskw_hi = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_hi), vexpw_max); in xnn_f32_f16_vcvt_ukernel__avx_x8() 63 … const __m128i vsignh = _mm_packs_epi32(_mm_castps_si128(vsignx_lo), _mm_castps_si128(vsignx_hi)); in xnn_f32_f16_vcvt_ukernel__avx_x8() 71 __m128i vexpw_lo = _mm_srli_epi32(_mm_castps_si128(vf_lo), 13); in xnn_f32_f16_vcvt_ukernel__avx_x8() 72 __m128i vexpw_hi = _mm_srli_epi32(_mm_castps_si128(vf_hi), 13); in xnn_f32_f16_vcvt_ukernel__avx_x8() 73 const __m128i vmantw_lo = _mm_and_si128(_mm_castps_si128(vf_lo), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__avx_x8() 74 const __m128i vmantw_hi = _mm_and_si128(_mm_castps_si128(vf_hi), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__avx_x8() 101 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__avx_x8() [all …]
|
D | vcvt-sse2-x8.c | 51 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 52 __m128i vbias_hi = _mm_add_epi32(_mm_castps_si128(vabsx_hi), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 55 const __m128i vnanmaskw_lo = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_lo), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 56 const __m128i vnanmaskw_hi = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_hi), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 63 … const __m128i vsignh = _mm_packs_epi32(_mm_castps_si128(vsignx_lo), _mm_castps_si128(vsignx_hi)); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 73 __m128i vexpw_lo = _mm_srli_epi32(_mm_castps_si128(vf_lo), 13); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 74 __m128i vexpw_hi = _mm_srli_epi32(_mm_castps_si128(vf_hi), 13); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 75 const __m128i vmantw_lo = _mm_and_si128(_mm_castps_si128(vf_lo), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 76 const __m128i vmantw_hi = _mm_and_si128(_mm_castps_si128(vf_hi), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse2_x8() 101 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse2_x8() [all …]
|
D | vcvt-sse41-x8.c | 51 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 52 __m128i vbias_hi = _mm_add_epi32(_mm_castps_si128(vabsx_hi), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 55 const __m128i vnanmaskw_lo = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_lo), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 56 const __m128i vnanmaskw_hi = _mm_cmpgt_epi32(_mm_castps_si128(vabsx_hi), vexpw_max); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 63 … const __m128i vsignh = _mm_packs_epi32(_mm_castps_si128(vsignx_lo), _mm_castps_si128(vsignx_hi)); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 71 __m128i vexpw_lo = _mm_srli_epi32(_mm_castps_si128(vf_lo), 13); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 72 __m128i vexpw_hi = _mm_srli_epi32(_mm_castps_si128(vf_hi), 13); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 73 const __m128i vmantw_lo = _mm_and_si128(_mm_castps_si128(vf_lo), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 74 const __m128i vmantw_hi = _mm_and_si128(_mm_castps_si128(vf_hi), vmanth_mask); in xnn_f32_f16_vcvt_ukernel__sse41_x8() 101 __m128i vbias_lo = _mm_add_epi32(_mm_castps_si128(vabsx_lo), vexp_bias); in xnn_f32_f16_vcvt_ukernel__sse41_x8() [all …]
|
/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-ssse3-6x4.c | 156 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 157 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 158 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 159 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 160 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 161 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 162 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 163 …st __m128 vi7x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi7x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 193 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() 194 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() [all …]
|
D | 3x3p1-minmax-ssse3-4x4.c | 128 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 129 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 130 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 131 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 132 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 133 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 155 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 156 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 157 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() 158 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() [all …]
|
D | 3x3p1-minmax-ssse3-1x4.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 130 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 131 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 132 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 139 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() [all …]
|
D | 3x3p1-minmax-ssse3-3x4.c | 114 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 115 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 116 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 117 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 118 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 136 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 137 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 138 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 139 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() 140 …st __m128 vi4x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() [all …]
|
D | 3x3p1-minmax-ssse3-5x4.c | 142 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 143 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 144 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 145 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 146 …st __m128 vi4x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi4x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 147 …st __m128 vi5x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi5x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 148 …st __m128 vi6x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi6x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 174 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 175 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() 176 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() [all …]
|
D | 3x3p1-minmax-ssse3-2x4.c | 100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 162 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() 163 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4() [all …]
|
D | 3x3p1-minmax-ssse3-2x4-acc2.c | 100 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 101 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 102 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 103 …st __m128 vi3x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 117 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 118 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 119 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 120 …st __m128 vi3x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi3x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 164 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() 165 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_2x4_acc2() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc3.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 132 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 133 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 134 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() 141 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc3() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc2.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 131 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 132 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 133 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() 140 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc2() [all …]
|
D | 3x3p1-minmax-ssse3-1x4-acc4.c | 86 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 87 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 88 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 98 …st __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 99 …st __m128 vi1x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 100 …st __m128 vi2x5678 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x89AB), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 133 …st __m128 vi0x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi0x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 134 …st __m128 vi1x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi1x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 135 …st __m128 vi2x3456 = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(vi2x4567), _mm_castps_si128… in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() 142 … const __m128 vi0x5678 = _mm_castsi128_ps(_mm_alignr_epi8(vzero, _mm_castps_si128(vi0x4567), 4)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4_acc4() [all …]
|
/external/XNNPACK/src/f16-f32-vcvt/gen/ |
D | vcvt-sse41-int16-x32.c | 63 …const __m128i vnorm0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 64 …const __m128i vnorm1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 65 …const __m128i vnorm2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 66 …const __m128i vnorm3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 67 …const __m128i vnorm4 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 68 …const __m128i vnorm5 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 69 …const __m128i vnorm6 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 70 …const __m128i vnorm7 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 72 …const __m128i vdenorm0 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() 73 …const __m128i vdenorm1 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x32() [all …]
|
D | vcvt-avx-int16-x32.c | 63 …const __m128i vnorm0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 64 …const __m128i vnorm1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 65 …const __m128i vnorm2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 66 …const __m128i vnorm3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 67 …const __m128i vnorm4 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 68 …const __m128i vnorm5 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 69 …const __m128i vnorm6 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 70 …const __m128i vnorm7 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm6, … in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 72 …const __m128i vdenorm0 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() 73 …const __m128i vdenorm1 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__avx_int16_x32() [all …]
|
D | vcvt-sse41-int16-x24.c | 58 …const __m128i vnorm0 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 59 …const __m128i vnorm1 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm0, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 60 …const __m128i vnorm2 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 61 …const __m128i vnorm3 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm2, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 62 …const __m128i vnorm4 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 63 …const __m128i vnorm5 = _mm_castps_si128(_mm_mul_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vprenorm4, … in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 65 …const __m128i vdenorm0 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 66 …const __m128i vdenorm1 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign0… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 67 …const __m128i vdenorm2 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpacklo_epi16(vnonsign1… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() 68 …const __m128i vdenorm3 = _mm_castps_si128(_mm_sub_ps(_mm_castsi128_ps(_mm_unpackhi_epi16(vnonsign1… in xnn_f16_f32_vcvt_ukernel__sse41_int16_x24() [all …]
|