Home
last modified time | relevance | path

Searched refs:_mm_cvtepi8_epi16 (Results 1 – 25 of 105) sorted by relevance

12345

/external/XNNPACK/src/qs8-gavgpool/gen/
D7p7x-minmax-sse41-c24-acc2.c44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
45 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
46 const __m128i vxi0xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
48 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
49 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
50 const __m128i vxi1xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
52 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
53 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
54 const __m128i vxi2xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
56 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c24_acc2()
[all …]
D7p7x-minmax-sse41-c16-acc2.c44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
45 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
47 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
48 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
50 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
51 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
53 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
54 const __m128i vxi3x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i3 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
56 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
57 const __m128i vxi4x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i4 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c16_acc2()
[all …]
D7p7x-minmax-sse41-c8-acc2.c44 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
46 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
48 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
50 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
52 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
54 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
56 const __m128i vxi6x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i6)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
90 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
92 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
94 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse41_c8_acc2()
[all …]
D7x-minmax-sse41-c24-acc2.c61 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
62 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
63 const __m128i vxi0xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
65 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
66 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
67 const __m128i vxi1xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
69 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
70 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
71 const __m128i vxi2xGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 16))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
73 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
[all …]
D7x-minmax-sse41-c16-acc2.c61 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
62 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i0 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
64 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
65 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i1 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
67 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
68 const __m128i vxi2x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i2 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
70 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
71 const __m128i vxi3x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i3 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
73 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
74 const __m128i vxi4x89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (i4 + 8))); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c16_acc2()
[all …]
D7x-minmax-sse41-c8-acc2.c61 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
63 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
65 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
67 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i3)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
69 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i4)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
71 const __m128i vxi5x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i5)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
73 const __m128i vxi6x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i6)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
128 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i0)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
130 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i1)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
132 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) i2)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c8_acc2()
[all …]
/external/XNNPACK/src/qs8-dwconv/gen/
Dup24x9-minmax-sse41-mul16.c92 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
94 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
96 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(vi0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
98 const __m128i vxk0x89ABCDEF = _mm_cvtepi8_epi16(vk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
100 const __m128i vxi0xGHIJKLMN = _mm_cvtepi8_epi16(vi0xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
102 const __m128i vxk0xGHIJKLMN = _mm_cvtepi8_epi16(vk0xGHIJKLMN); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
121 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(vi1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
123 const __m128i vxk1x01234567 = _mm_cvtepi8_epi16(vk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
125 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(vi1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
127 const __m128i vxk1x89ABCDEF = _mm_cvtepi8_epi16(vk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up24x9__sse41_mul16()
[all …]
Dup16x9-minmax-sse41-mul16.c90 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
92 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
94 const __m128i vxi0x89ABCDEF = _mm_cvtepi8_epi16(vi0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
96 const __m128i vxk0x89ABCDEF = _mm_cvtepi8_epi16(vk0x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
111 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(vi1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
113 const __m128i vxk1x01234567 = _mm_cvtepi8_epi16(vk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
115 const __m128i vxi1x89ABCDEF = _mm_cvtepi8_epi16(vi1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
117 const __m128i vxk1x89ABCDEF = _mm_cvtepi8_epi16(vk1x89ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
132 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(vi2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
134 const __m128i vxk2x01234567 = _mm_cvtepi8_epi16(vk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up16x9__sse41_mul16()
[all …]
Dup8x9-minmax-sse41-mul16.c88 const __m128i vxi0x01234567 = _mm_cvtepi8_epi16(vi0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
90 const __m128i vxk0x01234567 = _mm_cvtepi8_epi16(vk0x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
101 const __m128i vxi1x01234567 = _mm_cvtepi8_epi16(vi1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
103 const __m128i vxk1x01234567 = _mm_cvtepi8_epi16(vk1x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
114 const __m128i vxi2x01234567 = _mm_cvtepi8_epi16(vi2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
116 const __m128i vxk2x01234567 = _mm_cvtepi8_epi16(vk2x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
127 const __m128i vxi3x01234567 = _mm_cvtepi8_epi16(vi3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
129 const __m128i vxk3x01234567 = _mm_cvtepi8_epi16(vk3x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
140 const __m128i vxi4x01234567 = _mm_cvtepi8_epi16(vi4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
142 const __m128i vxk4x01234567 = _mm_cvtepi8_epi16(vk4x01234567); in xnn_qs8_dwconv_minmax_ukernel_up8x9__sse41_mul16()
[all …]
/external/XNNPACK/src/qs8-gemm/gen/
D1x4c2-minmax-xop-ld64.c55 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
59 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
64 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
69 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
74 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
84 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
88 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
96 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
104 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64()
D1x4c2-minmax-sse41-ld64.c50 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
54 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
59 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
64 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
69 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
79 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
83 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
91 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
99 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64()
D4x4c2-minmax-xop-ld64.c76 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
79 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
82 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
85 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
89 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
100 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
111 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
122 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
138 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
141 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64()
[all …]
D4x4c2-minmax-sse41-ld64.c71 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
74 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
77 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
80 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
84 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
95 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
106 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
117 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
133 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
136 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64()
[all …]
D4x4c2-minmax-xop-ld128.c76 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
79 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
82 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
85 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
138 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
141 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
144 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
147 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
151 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
165 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128()
[all …]
D4x4c2-minmax-sse41-ld128.c71 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
74 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
77 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
80 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
133 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
136 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
139 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
142 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
146 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
160 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128()
[all …]
D1x4c8-minmax-sse41-ld64.c53 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64()
57 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64()
61 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64()
65 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64()
69 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64()
D1x4c8-minmax-xop-ld64.c58 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64()
62 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64()
66 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64()
70 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64()
74 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64()
/external/XNNPACK/src/qs8-igemm/gen/
D4x4c2-minmax-sse41-ld64.c88 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
91 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
94 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
97 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
101 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
112 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
123 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
134 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
150 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
153 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_igemm_minmax_ukernel_4x4c2__sse41_ld64()
[all …]
D4x4c2-minmax-xop-ld64.c93 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
96 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
99 const __m128i vxa2 = _mm_cvtepi8_epi16(va2); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
102 const __m128i vxa3 = _mm_cvtepi8_epi16(va3); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
106 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
117 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
128 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
139 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
155 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
158 const __m128i vxa1 = _mm_cvtepi8_epi16(va1); in xnn_qs8_igemm_minmax_ukernel_4x4c2__xop_ld64()
[all …]
D1x4c2-minmax-sse41-ld64.c61 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64()
65 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64()
70 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64()
75 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64()
80 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64()
90 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_1x4c2__sse41_ld64()
D1x4c2-minmax-xop-ld64.c66 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64()
70 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64()
75 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64()
80 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64()
85 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64()
95 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_1x4c2__xop_ld64()
D1x4c8-minmax-xop-ld64.c69 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64()
73 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64()
77 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64()
81 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64()
85 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_1x4c8__xop_ld64()
D1x4c8-minmax-sse41-ld64.c64 const __m128i vxa0 = _mm_cvtepi8_epi16(va0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64()
68 const __m128i vxb0 = _mm_cvtepi8_epi16(vb0); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64()
72 const __m128i vxb1 = _mm_cvtepi8_epi16(vb1); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64()
76 const __m128i vxb2 = _mm_cvtepi8_epi16(vb2); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64()
80 const __m128i vxb3 = _mm_cvtepi8_epi16(vb3); in xnn_qs8_igemm_minmax_ukernel_1x4c8__sse41_ld64()
/external/XNNPACK/src/qs8-vadd/gen/
Dminmax-sse41-mul16-ld64-x32.c37 const __m128i vx01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
38 const __m128i vy01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
39 const __m128i vx89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_x + 8))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
40 const __m128i vy89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_y + 8))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
41 const __m128i vxGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_x + 16))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
42 const __m128i vyGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_y + 16))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
43 const __m128i vxOPQRSTUV = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_x + 24))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
44 const __m128i vyOPQRSTUV = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_y + 24))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
144 const __m128i vx01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
145 const __m128i vy01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x32()
Dminmax-sse41-mul16-ld64-x24.c37 const __m128i vx01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
38 const __m128i vy01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
39 const __m128i vx89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_x + 8))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
40 const __m128i vy89ABCDEF = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_y + 8))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
41 const __m128i vxGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_x + 16))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
42 const __m128i vyGHIJKLMN = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) (input_y + 16))); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
123 const __m128i vx01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_x)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()
124 const __m128i vy01234567 = _mm_cvtepi8_epi16(_mm_loadl_epi64((const __m128i*) input_y)); in xnn_qs8_vadd_minmax_ukernel__sse41_mul16_ld64_x24()

12345