Lines Matching refs:__m512
33 const __m512 vmax = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.max)); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
34 const __m512 vmin = _mm512_broadcast_f32x4(_mm_load_ps(params->sse.min)); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
86 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
87 __m512 vaccGHIJKLMNOPQRSTUVp0 = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
90 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
91 const __m512 vi0xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i0 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
95 const __m512 vk0xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
99 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
100 const __m512 vi1xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i1 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
103 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
104 const __m512 vk1xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
105 __m512 vacc0123456789ABCDEFp1 = _mm512_mul_ps(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
106 __m512 vaccGHIJKLMNOPQRSTUVp1 = _mm512_mul_ps(vi1xGHIJKLMNOPQRSTUV, vk1xGHIJKLMNOPQRSTUV); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
108 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
109 const __m512 vi2xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i2 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
112 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
113 const __m512 vk2xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
117 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
118 const __m512 vi3xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i3 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
121 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
122 const __m512 vk3xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
126 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
127 const __m512 vi4xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i4 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
130 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
131 const __m512 vk4xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 176); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
135 const __m512 vi5x0123456789ABCDEF = _mm512_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
136 const __m512 vi5xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i5 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
139 const __m512 vk5x0123456789ABCDEF = _mm512_load_ps(w + 192); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
140 const __m512 vk5xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 208); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
144 const __m512 vi6x0123456789ABCDEF = _mm512_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
145 const __m512 vi6xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i6 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
148 const __m512 vk6x0123456789ABCDEF = _mm512_load_ps(w + 224); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
149 const __m512 vk6xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 240); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
153 const __m512 vi7x0123456789ABCDEF = _mm512_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
154 const __m512 vi7xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i7 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
157 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
158 const __m512 vk7xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 272); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
162 const __m512 vi8x0123456789ABCDEF = _mm512_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
163 const __m512 vi8xGHIJKLMNOPQRSTUV = _mm512_loadu_ps(i8 + 16); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
166 const __m512 vk8x0123456789ABCDEF = _mm512_load_ps(w + 288); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
167 const __m512 vk8xGHIJKLMNOPQRSTUV = _mm512_load_ps(w + 304); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
177 __m512 vacc0123456789ABCDEF = _mm512_max_ps(vacc0123456789ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
178 __m512 vaccGHIJKLMNOPQRSTUV = _mm512_max_ps(vaccGHIJKLMNOPQRSTUVp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
187 __m512 vacc0123456789ABCDEFp0 = _mm512_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
189 const __m512 vi0x0123456789ABCDEF = _mm512_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
192 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
195 const __m512 vi1x0123456789ABCDEF = _mm512_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
198 const __m512 vk1x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
199 __m512 vacc0123456789ABCDEFp1 = _mm512_mul_ps(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
201 const __m512 vi2x0123456789ABCDEF = _mm512_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
204 const __m512 vk2x0123456789ABCDEF = _mm512_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
207 const __m512 vi3x0123456789ABCDEF = _mm512_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
210 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
213 const __m512 vi4x0123456789ABCDEF = _mm512_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
216 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
219 const __m512 vi5x0123456789ABCDEF = _mm512_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
222 const __m512 vk5x0123456789ABCDEF = _mm512_load_ps(w + 192); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
225 const __m512 vi6x0123456789ABCDEF = _mm512_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
228 const __m512 vk6x0123456789ABCDEF = _mm512_load_ps(w + 224); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
231 const __m512 vi7x0123456789ABCDEF = _mm512_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
234 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
237 const __m512 vi8x0123456789ABCDEF = _mm512_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
240 const __m512 vk8x0123456789ABCDEF = _mm512_load_ps(w + 288); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
248 __m512 vacc0123456789ABCDEF = _mm512_max_ps(vacc0123456789ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
260 __m512 vacc0123456789ABCDEFp0 = _mm512_maskz_loadu_ps(vmask, w); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
262 const __m512 vi0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i0); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
263 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
266 const __m512 vi1x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i1); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
267 const __m512 vk1x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
268 __m512 vacc0123456789ABCDEFp1 = _mm512_mul_ps(vi1x0123456789ABCDEF, vk1x0123456789ABCDEF); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
270 const __m512 vi2x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i2); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
271 const __m512 vk2x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 96); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
274 const __m512 vi3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i3); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
275 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
278 const __m512 vi4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i4); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
279 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
282 const __m512 vi5x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i5); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
283 const __m512 vk5x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 192); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
286 const __m512 vi6x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i6); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
287 const __m512 vk6x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 224); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
290 const __m512 vi7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i7); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
291 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
294 const __m512 vi8x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, i8); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
295 const __m512 vk8x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 288); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
301 __m512 vacc0123456789ABCDEF = _mm512_max_ps(vacc0123456789ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()