Home
last modified time | relevance | path

Searched refs:_mm512_add_epi32 (Results 1 – 25 of 35) sorted by relevance

12

/external/XNNPACK/src/qs8-gemm/gen/
D4x16c8-minmax-avx512skx.c104 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
105 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
106 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
107 vacc3x0123 = _mm512_add_epi32(vacc3x0123, _mm512_madd_epi16(va3, vb0123)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
110 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
111 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
112 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
113 vacc3x4567 = _mm512_add_epi32(vacc3x4567, _mm512_madd_epi16(va3, vb4567)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
116 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
117 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx()
[all …]
D3x16c8-minmax-avx512skx.c92 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
93 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
94 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
97 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
98 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
99 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
102 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
103 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
104 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
107 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx()
[all …]
D2x16c8-minmax-avx512skx.c80 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
81 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
84 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
85 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
88 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
89 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
92 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
93 vacc1xCDEF = _mm512_add_epi32(vacc1xCDEF, _mm512_madd_epi16(va1, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
99 …const __m512i vacc0x04152637 = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x0123, vacc0x4567), _mm… in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
100 …const __m512i vacc0x8C9DAEBF = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x89AB, vacc0xCDEF), _mm… in xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx()
[all …]
D1x16c8-minmax-avx512skx.c68 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
71 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
74 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
77 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
83 …const __m512i vacc0x04152637 = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x0123, vacc0x4567), _mm… in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
84 …const __m512i vacc0x8C9DAEBF = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x89AB, vacc0xCDEF), _mm… in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
86 …__m512i vacc0x084C195D2A6E3B7F = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x04152637, vacc0x8C9D… in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
100_mm512_add_epi32(_mm512_and_si512(vq31prod0x084C195D2A6E3B7F, vremainder_mask), _mm512_srai_epi32(… in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
/external/XNNPACK/src/qs8-igemm/gen/
D4x16c8-minmax-avx512skx.c119 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
120 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
121 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
122 vacc3x0123 = _mm512_add_epi32(vacc3x0123, _mm512_madd_epi16(va3, vb0123)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
125 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
126 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
127 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
128 vacc3x4567 = _mm512_add_epi32(vacc3x4567, _mm512_madd_epi16(va3, vb4567)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
131 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
132 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_4x16c8__avx512skx()
[all …]
D3x16c8-minmax-avx512skx.c105 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
106 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
107 vacc2x0123 = _mm512_add_epi32(vacc2x0123, _mm512_madd_epi16(va2, vb0123)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
110 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
111 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
112 vacc2x4567 = _mm512_add_epi32(vacc2x4567, _mm512_madd_epi16(va2, vb4567)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
115 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
116 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
117 vacc2x89AB = _mm512_add_epi32(vacc2x89AB, _mm512_madd_epi16(va2, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
120 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_3x16c8__avx512skx()
[all …]
D2x16c8-minmax-avx512skx.c91 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
92 vacc1x0123 = _mm512_add_epi32(vacc1x0123, _mm512_madd_epi16(va1, vb0123)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
95 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
96 vacc1x4567 = _mm512_add_epi32(vacc1x4567, _mm512_madd_epi16(va1, vb4567)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
99 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
100 vacc1x89AB = _mm512_add_epi32(vacc1x89AB, _mm512_madd_epi16(va1, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
103 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
104 vacc1xCDEF = _mm512_add_epi32(vacc1xCDEF, _mm512_madd_epi16(va1, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
112 …const __m512i vacc0x04152637 = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x0123, vacc0x4567), _mm… in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
113 …const __m512i vacc0x8C9DAEBF = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x89AB, vacc0xCDEF), _mm… in xnn_qs8_igemm_minmax_ukernel_2x16c8__avx512skx()
[all …]
D1x16c8-minmax-avx512skx.c77 vacc0x0123 = _mm512_add_epi32(vacc0x0123, _mm512_madd_epi16(va0, vb0123)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
80 vacc0x4567 = _mm512_add_epi32(vacc0x4567, _mm512_madd_epi16(va0, vb4567)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
83 vacc0x89AB = _mm512_add_epi32(vacc0x89AB, _mm512_madd_epi16(va0, vb89AB)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
86 vacc0xCDEF = _mm512_add_epi32(vacc0xCDEF, _mm512_madd_epi16(va0, vbCDEF)); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
94 …const __m512i vacc0x04152637 = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x0123, vacc0x4567), _mm… in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
95 …const __m512i vacc0x8C9DAEBF = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x89AB, vacc0xCDEF), _mm… in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
97 …__m512i vacc0x084C195D2A6E3B7F = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc0x04152637, vacc0x8C9D… in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
111_mm512_add_epi32(_mm512_and_si512(vq31prod0x084C195D2A6E3B7F, vremainder_mask), _mm512_srai_epi32(… in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
/external/XNNPACK/src/qs8-dwconv/gen/
Dup32x9-minmax-avx512skx-mul32.c105 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi0x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
106 …vaccGHIJKLMNOPQRSTUV = _mm512_add_epi32(vaccGHIJKLMNOPQRSTUV, _mm512_mullo_epi32(vi0xGHIJKLMNOPQRS… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
114 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi1x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
115 …vaccGHIJKLMNOPQRSTUV = _mm512_add_epi32(vaccGHIJKLMNOPQRSTUV, _mm512_mullo_epi32(vi1xGHIJKLMNOPQRS… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
123 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi2x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
124 …vaccGHIJKLMNOPQRSTUV = _mm512_add_epi32(vaccGHIJKLMNOPQRSTUV, _mm512_mullo_epi32(vi2xGHIJKLMNOPQRS… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
132 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi3x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
133 …vaccGHIJKLMNOPQRSTUV = _mm512_add_epi32(vaccGHIJKLMNOPQRSTUV, _mm512_mullo_epi32(vi3xGHIJKLMNOPQRS… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
141 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi4x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
142 …vaccGHIJKLMNOPQRSTUV = _mm512_add_epi32(vaccGHIJKLMNOPQRSTUV, _mm512_mullo_epi32(vi4xGHIJKLMNOPQRS… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
[all …]
Dup16x9-minmax-avx512skx-mul32.c101 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi0x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
107 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi1x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
113 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi2x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
119 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi3x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
125 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi4x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
131 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi5x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
137 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi6x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
143 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi7x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
149 …vacc0123456789ABCDEF = _mm512_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi8x0123456789ABC… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
164_mm512_add_epi32(_mm512_and_epi32(vq31prod0123456789ABCDEF, vremainder_mask), _mm512_srai_epi32(vq… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
[all …]
/external/libvpx/libvpx/vpx_dsp/x86/
Dsad4d_avx512.c44 sum_ref0 = _mm512_add_epi32(sum_ref0, ref0_reg); in vpx_sad64x64x4d_avx512()
45 sum_ref1 = _mm512_add_epi32(sum_ref1, ref1_reg); in vpx_sad64x64x4d_avx512()
46 sum_ref2 = _mm512_add_epi32(sum_ref2, ref2_reg); in vpx_sad64x64x4d_avx512()
47 sum_ref3 = _mm512_add_epi32(sum_ref3, ref3_reg); in vpx_sad64x64x4d_avx512()
73 sum_mlow = _mm512_add_epi32(sum_mlow, sum_mhigh); in vpx_sad64x64x4d_avx512()
/external/ruy/ruy/
Dkernel_avx512.cc165 initial_accum_data = _mm512_add_epi32(
238 accum_data_v0 = _mm512_add_epi32(
241 accum_data_v1 = _mm512_add_epi32(
244 accum_data_v2 = _mm512_add_epi32(
247 accum_data_v3 = _mm512_add_epi32(
250 accum_data_v4 = _mm512_add_epi32(
253 accum_data_v5 = _mm512_add_epi32(
256 accum_data_v6 = _mm512_add_epi32(
259 accum_data_v7 = _mm512_add_epi32(
262 accum_data_v8 = _mm512_add_epi32(
[all …]
/external/XNNPACK/src/f32-velu/gen/
Dvelu-avx512f-rr1-lut16-p3-perm-x128.c88 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
90 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
92 __m512 vs2 = _mm512_castsi512_ps(_mm512_add_epi32(vl2, ven2)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
94 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
96 __m512 vs4 = _mm512_castsi512_ps(_mm512_add_epi32(vl4, ven4)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
98 __m512 vs5 = _mm512_castsi512_ps(_mm512_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
100 __m512 vs6 = _mm512_castsi512_ps(_mm512_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
102 __m512 vs7 = _mm512_castsi512_ps(_mm512_add_epi32(vl7, ven7)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
205 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
238 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x128()
Dvelu-avx512f-rr1-lut16-p3-perm-x112.c83 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
85 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
87 __m512 vs2 = _mm512_castsi512_ps(_mm512_add_epi32(vl2, ven2)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
89 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
91 __m512 vs4 = _mm512_castsi512_ps(_mm512_add_epi32(vl4, ven4)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
93 __m512 vs5 = _mm512_castsi512_ps(_mm512_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
95 __m512 vs6 = _mm512_castsi512_ps(_mm512_add_epi32(vl6, ven6)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
188 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
221 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x112()
Dvelu-avx512f-rr1-lut16-p3-perm-x80.c73 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
75 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
77 __m512 vs2 = _mm512_castsi512_ps(_mm512_add_epi32(vl2, ven2)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
79 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
81 __m512 vs4 = _mm512_castsi512_ps(_mm512_add_epi32(vl4, ven4)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
154 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
187 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x80()
Dvelu-avx512f-rr1-lut16-p3-perm-x96.c78 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
80 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
82 __m512 vs2 = _mm512_castsi512_ps(_mm512_add_epi32(vl2, ven2)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
84 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
86 __m512 vs4 = _mm512_castsi512_ps(_mm512_add_epi32(vl4, ven4)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
88 __m512 vs5 = _mm512_castsi512_ps(_mm512_add_epi32(vl5, ven5)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
171 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
204 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x96()
Dvelu-avx512f-rr1-lut16-p3-perm-x64.c68 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
70 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
72 __m512 vs2 = _mm512_castsi512_ps(_mm512_add_epi32(vl2, ven2)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
74 __m512 vs3 = _mm512_castsi512_ps(_mm512_add_epi32(vl3, ven3)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
137 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
170 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
Dvelu-avx512f-rr1-lut16-p3-perm-x48.c63 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
65 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
67 __m512 vs2 = _mm512_castsi512_ps(_mm512_add_epi32(vl2, ven2)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
120 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
153 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
Dvelu-avx512f-rr1-lut16-p3-perm-x32.c58 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
60 __m512 vs1 = _mm512_castsi512_ps(_mm512_add_epi32(vl1, ven1)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
103 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
136 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
Dvelu-avx512f-rr1-lut16-p3-perm-x16.c52 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
85 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x16()
/external/XNNPACK/src/math/
Dexp-avx512f-rr2-p5.c63 const __m512 vsn = _mm512_castsi512_ps(_mm512_add_epi32(ven, vdefault_exponent)); in xnn_math_f32_exp__avx512f_rr2_p5()
64 const __m512 vso = _mm512_castsi512_ps(_mm512_add_epi32(veo, vdefault_exponent)); in xnn_math_f32_exp__avx512f_rr2_p5()
Dexpm1minus-avx512f-rr1-lut16-p3-perm.c73 const __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven)); in xnn_math_f32_expm1minus__avx512f_rr1_lut16_p3_perm()
/external/XNNPACK/src/qs8-gemm/
DMRx16c8-avx512skx.c.in105 …vacc${M}x${ABC[N:N+4]} = _mm512_add_epi32(vacc${M}x${ABC[N:N+4]}, _mm512_madd_epi16(va${M}, vb${AB…
115 …const __m512i vacc${M}x04152637 = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc${M}x0123, vacc${M}x4…
116 …const __m512i vacc${M}x8C9DAEBF = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc${M}x89AB, vacc${M}xC…
119 …__m512i vacc${M}x084C195D2A6E3B7F = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc${M}x04152637, vacc…
139_mm512_add_epi32(_mm512_and_si512(vq31prod${M}x084C195D2A6E3B7F, vremainder_mask), _mm512_srai_epi…
/external/XNNPACK/src/qs8-igemm/
DMRx16c8-avx512skx.c.in111 …vacc${M}x${ABC[N:N+4]} = _mm512_add_epi32(vacc${M}x${ABC[N:N+4]}, _mm512_madd_epi16(va${M}, vb${AB…
123 …const __m512i vacc${M}x04152637 = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc${M}x0123, vacc${M}x4…
124 …const __m512i vacc${M}x8C9DAEBF = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc${M}x89AB, vacc${M}xC…
127 …__m512i vacc${M}x084C195D2A6E3B7F = _mm512_add_epi32(_mm512_unpacklo_epi32(vacc${M}x04152637, vacc…
147_mm512_add_epi32(_mm512_and_si512(vq31prod${M}x084C195D2A6E3B7F, vremainder_mask), _mm512_srai_epi…
/external/XNNPACK/src/f32-velu/
Davx512f-rr1-lut16-p3-perm.c.in59 __m512 vs${N} = _mm512_castsi512_ps(_mm512_add_epi32(vl${N}, ven${N}));
101 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven));
134 __m512 vs = _mm512_castsi512_ps(_mm512_add_epi32(vl, ven));

12