/external/XNNPACK/src/qs8-gemm/gen/ |
D | 3x4c8-minmax-xop-ld128.c | 114 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 115 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 116 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 118 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 119 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 121 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 122 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128() 123 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128()
|
D | 3x4c8-minmax-sse41-ld128.c | 109 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 110 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 111 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 112 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 113 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 114 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 116 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 117 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128() 118 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128()
|
D | 3x4c8-minmax-xop-ld64.c | 116 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 117 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 118 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 119 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 120 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 121 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 123 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 124 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64() 125 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64()
|
D | 3x4c8-xw-minmax-xop.c | 112 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 113 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 114 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 115 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 116 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 117 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 119 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 120 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop() 121 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop()
|
D | 3x4c8-minmax-sse41-ld64.c | 111 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 112 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 113 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 114 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 115 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 116 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 118 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 119 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64() 120 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64()
|
D | 3x4c8-xw-minmax-sse41.c | 107 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 108 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 109 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 111 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 112 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 114 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 115 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41() 116 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41()
|
D | 3x4c8-xw-minmax-ssse3.c | 107 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 108 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 109 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 110 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 111 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 112 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 114 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 115 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3() 116 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3()
|
D | 2x4c8-xw-minmax-xop.c | 95 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 96 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 97 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 98 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 100 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop() 101 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop()
|
D | 3x4c8-minmax-ssse3-ld128.c | 109 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 110 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 111 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 112 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 113 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 114 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 116 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 117 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128() 118 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128()
|
D | 3x4c8-minmax-ssse3-ld64.c | 111 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 112 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 113 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 114 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 115 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 116 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 118 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 119 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64() 120 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64()
|
D | 2x4c8-minmax-sse41-ld64.c | 94 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 95 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 96 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 97 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 99 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64() 100 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64()
|
D | 2x4c8-xw-minmax-sse41.c | 90 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() 91 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() 92 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() 93 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() 95 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41() 96 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41()
|
D | 2x4c8-minmax-xop-ld128.c | 97 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 98 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 99 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 100 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 102 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128() 103 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128()
|
D | 2x4c8-minmax-sse41-ld128.c | 92 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 93 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 94 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 95 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 97 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128() 98 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 3x4c8-minmax-xop-ld64.c | 133 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 134 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 135 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 136 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 137 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 138 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 140 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 141 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64() 142 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld64()
|
D | 3x4c8-minmax-xop-ld128.c | 131 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 132 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 133 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 134 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 135 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 136 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 138 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 139 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128() 140 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__xop_ld128()
|
D | 3x4c8-minmax-sse41-ld64.c | 128 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 129 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 130 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 131 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 132 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 133 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 135 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 136 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64() 137 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld64()
|
D | 3x4c8-minmax-sse41-ld128.c | 126 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 127 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 128 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 129 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 130 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 131 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 133 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 134 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128() 135 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__sse41_ld128()
|
D | 3x4c8-minmax-ssse3-ld64.c | 128 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 129 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 130 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 131 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 132 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 133 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 135 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 136 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64() 137 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld64()
|
D | 3x4c8-minmax-ssse3-ld128.c | 126 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 127 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 128 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 129 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 130 const __m128i vacc2x01 = _mm_hadd_epi32(vacc2x0, vacc2x1); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 131 const __m128i vacc2x23 = _mm_hadd_epi32(vacc2x2, vacc2x3); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 133 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 134 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128() 135 __m128i vacc2x0123 = _mm_hadd_epi32(vacc2x01, vacc2x23); in xnn_qs8_igemm_minmax_ukernel_3x4c8__ssse3_ld128()
|
D | 2x4c8-minmax-xop-ld64.c | 114 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 115 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 116 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 117 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 119 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64() 120 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld64()
|
D | 2x4c8-minmax-xop-ld128.c | 112 const __m128i vacc0x01 = _mm_hadd_epi32(vacc0x0, vacc0x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 113 const __m128i vacc0x23 = _mm_hadd_epi32(vacc0x2, vacc0x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 114 const __m128i vacc1x01 = _mm_hadd_epi32(vacc1x0, vacc1x1); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 115 const __m128i vacc1x23 = _mm_hadd_epi32(vacc1x2, vacc1x3); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 117 __m128i vacc0x0123 = _mm_hadd_epi32(vacc0x01, vacc0x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128() 118 __m128i vacc1x0123 = _mm_hadd_epi32(vacc1x01, vacc1x23); in xnn_qs8_igemm_minmax_ukernel_2x4c8__xop_ld128()
|
/external/libgav1/libgav1/src/dsp/x86/ |
D | super_res_sse4.cc | 259 a[0] = _mm_hadd_epi32(weighted_src[0], weighted_src[1]); in SuperRes_SSE4_1() 260 a[1] = _mm_hadd_epi32(weighted_src[2], weighted_src[3]); in SuperRes_SSE4_1() 261 a[2] = _mm_hadd_epi32(weighted_src[4], weighted_src[5]); in SuperRes_SSE4_1() 262 a[3] = _mm_hadd_epi32(weighted_src[6], weighted_src[7]); in SuperRes_SSE4_1() 264 a[0] = _mm_hadd_epi32(a[0], a[1]); in SuperRes_SSE4_1() 265 a[1] = _mm_hadd_epi32(a[2], a[3]); in SuperRes_SSE4_1()
|
/external/libaom/libaom/av1/common/x86/ |
D | av1_convolve_horiz_rs_sse4.c | 105 const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32); in av1_convolve_horiz_rs_sse4_1() 106 const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32); in av1_convolve_horiz_rs_sse4_1() 108 const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32); in av1_convolve_horiz_rs_sse4_1() 208 const __m128i conv01_32 = _mm_hadd_epi32(conv0_32, conv1_32); in av1_highbd_convolve_horiz_rs_sse4_1() 209 const __m128i conv23_32 = _mm_hadd_epi32(conv2_32, conv3_32); in av1_highbd_convolve_horiz_rs_sse4_1() 211 const __m128i conv0123_32 = _mm_hadd_epi32(conv01_32, conv23_32); in av1_highbd_convolve_horiz_rs_sse4_1()
|
/external/libaom/libaom/aom_dsp/x86/ |
D | highbd_variance_sse4.c | 54 s2 = _mm_hadd_epi32(t0, t1); in variance4x4_64_sse4_1() 55 s3 = _mm_hadd_epi32(s2, s2); in variance4x4_64_sse4_1() 56 y0 = _mm_hadd_epi32(s3, s3); in variance4x4_64_sse4_1() 61 s2 = _mm_hadd_epi32(t0, t1); in variance4x4_64_sse4_1() 62 s3 = _mm_hadd_epi32(s2, s2); in variance4x4_64_sse4_1() 63 x0 = _mm_hadd_epi32(s3, s3); in variance4x4_64_sse4_1()
|