• Home
  • Raw
  • Download

Lines Matching refs:__m256

34   const __m256 vmax = _mm256_broadcast_ps((const __m128*) params->sse.max);  in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
35 const __m256 vmin = _mm256_broadcast_ps((const __m128*) params->sse.min); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
167 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
168 __m256 vacc89ABCDEFp0 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
171 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
172 const __m256 vi0x89ABCDEF = _mm256_loadu_ps(i0 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
175 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
176 const __m256 vk0x89ABCDEF = _mm256_load_ps(w + 24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
180 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
181 const __m256 vi1x89ABCDEF = _mm256_loadu_ps(i1 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
184 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
185 const __m256 vk1x89ABCDEF = _mm256_load_ps(w + 40); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
189 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
190 const __m256 vi2x89ABCDEF = _mm256_loadu_ps(i2 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
193 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
194 const __m256 vk2x89ABCDEF = _mm256_load_ps(w + 56); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
198 const __m256 vi3x01234567 = _mm256_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
199 const __m256 vi3x89ABCDEF = _mm256_loadu_ps(i3 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
202 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
203 const __m256 vk3x89ABCDEF = _mm256_load_ps(w + 72); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
207 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
208 const __m256 vi4x89ABCDEF = _mm256_loadu_ps(i4 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
211 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
212 const __m256 vk4x89ABCDEF = _mm256_load_ps(w + 88); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
216 const __m256 vi5x01234567 = _mm256_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
217 const __m256 vi5x89ABCDEF = _mm256_loadu_ps(i5 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
220 const __m256 vk5x01234567 = _mm256_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
221 const __m256 vk5x89ABCDEF = _mm256_load_ps(w + 104); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
225 const __m256 vi6x01234567 = _mm256_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
226 const __m256 vi6x89ABCDEF = _mm256_loadu_ps(i6 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
229 const __m256 vk6x01234567 = _mm256_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
230 const __m256 vk6x89ABCDEF = _mm256_load_ps(w + 120); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
234 const __m256 vi7x01234567 = _mm256_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
235 const __m256 vi7x89ABCDEF = _mm256_loadu_ps(i7 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
238 const __m256 vk7x01234567 = _mm256_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
239 const __m256 vk7x89ABCDEF = _mm256_load_ps(w + 136); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
243 const __m256 vi8x01234567 = _mm256_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
244 const __m256 vi8x89ABCDEF = _mm256_loadu_ps(i8 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
247 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
248 const __m256 vk8x89ABCDEF = _mm256_load_ps(w + 152); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
252 const __m256 vi9x01234567 = _mm256_loadu_ps(i9); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
253 const __m256 vi9x89ABCDEF = _mm256_loadu_ps(i9 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
256 const __m256 vk9x01234567 = _mm256_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
257 const __m256 vk9x89ABCDEF = _mm256_load_ps(w + 168); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
261 const __m256 vi10x01234567 = _mm256_loadu_ps(i10); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
262 const __m256 vi10x89ABCDEF = _mm256_loadu_ps(i10 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
265 const __m256 vk10x01234567 = _mm256_load_ps(w + 176); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
266 const __m256 vk10x89ABCDEF = _mm256_load_ps(w + 184); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
270 const __m256 vi11x01234567 = _mm256_loadu_ps(i11); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
271 const __m256 vi11x89ABCDEF = _mm256_loadu_ps(i11 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
274 const __m256 vk11x01234567 = _mm256_load_ps(w + 192); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
275 const __m256 vk11x89ABCDEF = _mm256_load_ps(w + 200); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
279 const __m256 vi12x01234567 = _mm256_loadu_ps(i12); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
280 const __m256 vi12x89ABCDEF = _mm256_loadu_ps(i12 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
283 const __m256 vk12x01234567 = _mm256_load_ps(w + 208); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
284 const __m256 vk12x89ABCDEF = _mm256_load_ps(w + 216); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
288 const __m256 vi13x01234567 = _mm256_loadu_ps(i13); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
289 const __m256 vi13x89ABCDEF = _mm256_loadu_ps(i13 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
292 const __m256 vk13x01234567 = _mm256_load_ps(w + 224); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
293 const __m256 vk13x89ABCDEF = _mm256_load_ps(w + 232); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
297 const __m256 vi14x01234567 = _mm256_loadu_ps(i14); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
298 const __m256 vi14x89ABCDEF = _mm256_loadu_ps(i14 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
301 const __m256 vk14x01234567 = _mm256_load_ps(w + 240); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
302 const __m256 vk14x89ABCDEF = _mm256_load_ps(w + 248); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
306 const __m256 vi15x01234567 = _mm256_loadu_ps(i15); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
307 const __m256 vi15x89ABCDEF = _mm256_loadu_ps(i15 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
310 const __m256 vk15x01234567 = _mm256_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
311 const __m256 vk15x89ABCDEF = _mm256_load_ps(w + 264); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
315 const __m256 vi16x01234567 = _mm256_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
316 const __m256 vi16x89ABCDEF = _mm256_loadu_ps(i16 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
319 const __m256 vk16x01234567 = _mm256_load_ps(w + 272); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
320 const __m256 vk16x89ABCDEF = _mm256_load_ps(w + 280); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
324 const __m256 vi17x01234567 = _mm256_loadu_ps(i17); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
325 const __m256 vi17x89ABCDEF = _mm256_loadu_ps(i17 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
328 const __m256 vk17x01234567 = _mm256_load_ps(w + 288); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
329 const __m256 vk17x89ABCDEF = _mm256_load_ps(w + 296); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
333 const __m256 vi18x01234567 = _mm256_loadu_ps(i18); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
334 const __m256 vi18x89ABCDEF = _mm256_loadu_ps(i18 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
337 const __m256 vk18x01234567 = _mm256_load_ps(w + 304); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
338 const __m256 vk18x89ABCDEF = _mm256_load_ps(w + 312); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
342 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
343 const __m256 vi19x89ABCDEF = _mm256_loadu_ps(i19 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
346 const __m256 vk19x01234567 = _mm256_load_ps(w + 320); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
347 const __m256 vk19x89ABCDEF = _mm256_load_ps(w + 328); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
351 const __m256 vi20x01234567 = _mm256_loadu_ps(i20); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
352 const __m256 vi20x89ABCDEF = _mm256_loadu_ps(i20 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
355 const __m256 vk20x01234567 = _mm256_load_ps(w + 336); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
356 const __m256 vk20x89ABCDEF = _mm256_load_ps(w + 344); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
360 const __m256 vi21x01234567 = _mm256_loadu_ps(i21); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
361 const __m256 vi21x89ABCDEF = _mm256_loadu_ps(i21 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
364 const __m256 vk21x01234567 = _mm256_load_ps(w + 352); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
365 const __m256 vk21x89ABCDEF = _mm256_load_ps(w + 360); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
369 const __m256 vi22x01234567 = _mm256_loadu_ps(i22); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
370 const __m256 vi22x89ABCDEF = _mm256_loadu_ps(i22 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
373 const __m256 vk22x01234567 = _mm256_load_ps(w + 368); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
374 const __m256 vk22x89ABCDEF = _mm256_load_ps(w + 376); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
378 const __m256 vi23x01234567 = _mm256_loadu_ps(i23); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
379 const __m256 vi23x89ABCDEF = _mm256_loadu_ps(i23 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
382 const __m256 vk23x01234567 = _mm256_load_ps(w + 384); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
383 const __m256 vk23x89ABCDEF = _mm256_load_ps(w + 392); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
387 const __m256 vi24x01234567 = _mm256_loadu_ps(i24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
388 const __m256 vi24x89ABCDEF = _mm256_loadu_ps(i24 + 8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
391 const __m256 vk24x01234567 = _mm256_load_ps(w + 400); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
392 const __m256 vk24x89ABCDEF = _mm256_load_ps(w + 408); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
399 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
400 __m256 vacc89ABCDEF = _mm256_max_ps(vacc89ABCDEFp0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
409 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
411 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
414 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
417 const __m256 vi1x01234567 = _mm256_loadu_ps(i1); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
420 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
423 const __m256 vi2x01234567 = _mm256_loadu_ps(i2); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
426 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
429 const __m256 vi3x01234567 = _mm256_loadu_ps(i3); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
432 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
435 const __m256 vi4x01234567 = _mm256_loadu_ps(i4); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
438 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
441 const __m256 vi5x01234567 = _mm256_loadu_ps(i5); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
444 const __m256 vk5x01234567 = _mm256_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
447 const __m256 vi6x01234567 = _mm256_loadu_ps(i6); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
450 const __m256 vk6x01234567 = _mm256_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
453 const __m256 vi7x01234567 = _mm256_loadu_ps(i7); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
456 const __m256 vk7x01234567 = _mm256_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
459 const __m256 vi8x01234567 = _mm256_loadu_ps(i8); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
462 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
465 const __m256 vi9x01234567 = _mm256_loadu_ps(i9); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
468 const __m256 vk9x01234567 = _mm256_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
471 const __m256 vi10x01234567 = _mm256_loadu_ps(i10); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
474 const __m256 vk10x01234567 = _mm256_load_ps(w + 176); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
477 const __m256 vi11x01234567 = _mm256_loadu_ps(i11); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
480 const __m256 vk11x01234567 = _mm256_load_ps(w + 192); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
483 const __m256 vi12x01234567 = _mm256_loadu_ps(i12); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
486 const __m256 vk12x01234567 = _mm256_load_ps(w + 208); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
489 const __m256 vi13x01234567 = _mm256_loadu_ps(i13); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
492 const __m256 vk13x01234567 = _mm256_load_ps(w + 224); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
495 const __m256 vi14x01234567 = _mm256_loadu_ps(i14); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
498 const __m256 vk14x01234567 = _mm256_load_ps(w + 240); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
501 const __m256 vi15x01234567 = _mm256_loadu_ps(i15); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
504 const __m256 vk15x01234567 = _mm256_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
507 const __m256 vi16x01234567 = _mm256_loadu_ps(i16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
510 const __m256 vk16x01234567 = _mm256_load_ps(w + 272); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
513 const __m256 vi17x01234567 = _mm256_loadu_ps(i17); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
516 const __m256 vk17x01234567 = _mm256_load_ps(w + 288); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
519 const __m256 vi18x01234567 = _mm256_loadu_ps(i18); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
522 const __m256 vk18x01234567 = _mm256_load_ps(w + 304); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
525 const __m256 vi19x01234567 = _mm256_loadu_ps(i19); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
528 const __m256 vk19x01234567 = _mm256_load_ps(w + 320); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
531 const __m256 vi20x01234567 = _mm256_loadu_ps(i20); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
534 const __m256 vk20x01234567 = _mm256_load_ps(w + 336); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
537 const __m256 vi21x01234567 = _mm256_loadu_ps(i21); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
540 const __m256 vk21x01234567 = _mm256_load_ps(w + 352); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
543 const __m256 vi22x01234567 = _mm256_loadu_ps(i22); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
546 const __m256 vk22x01234567 = _mm256_load_ps(w + 368); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
549 const __m256 vi23x01234567 = _mm256_loadu_ps(i23); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
552 const __m256 vk23x01234567 = _mm256_load_ps(w + 384); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
555 const __m256 vi24x01234567 = _mm256_loadu_ps(i24); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
558 const __m256 vk24x01234567 = _mm256_load_ps(w + 400); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
564 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
575 __m256 vacc01234567p0 = _mm256_load_ps(w); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
577 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
578 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
581 const __m256 vi1x01234567 = _mm256_maskload_ps(i1, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
582 const __m256 vk1x01234567 = _mm256_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
585 const __m256 vi2x01234567 = _mm256_maskload_ps(i2, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
586 const __m256 vk2x01234567 = _mm256_load_ps(w + 48); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
589 const __m256 vi3x01234567 = _mm256_maskload_ps(i3, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
590 const __m256 vk3x01234567 = _mm256_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
593 const __m256 vi4x01234567 = _mm256_maskload_ps(i4, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
594 const __m256 vk4x01234567 = _mm256_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
597 const __m256 vi5x01234567 = _mm256_maskload_ps(i5, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
598 const __m256 vk5x01234567 = _mm256_load_ps(w + 96); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
601 const __m256 vi6x01234567 = _mm256_maskload_ps(i6, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
602 const __m256 vk6x01234567 = _mm256_load_ps(w + 112); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
605 const __m256 vi7x01234567 = _mm256_maskload_ps(i7, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
606 const __m256 vk7x01234567 = _mm256_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
609 const __m256 vi8x01234567 = _mm256_maskload_ps(i8, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
610 const __m256 vk8x01234567 = _mm256_load_ps(w + 144); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
613 const __m256 vi9x01234567 = _mm256_maskload_ps(i9, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
614 const __m256 vk9x01234567 = _mm256_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
617 const __m256 vi10x01234567 = _mm256_maskload_ps(i10, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
618 const __m256 vk10x01234567 = _mm256_load_ps(w + 176); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
621 const __m256 vi11x01234567 = _mm256_maskload_ps(i11, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
622 const __m256 vk11x01234567 = _mm256_load_ps(w + 192); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
625 const __m256 vi12x01234567 = _mm256_maskload_ps(i12, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
626 const __m256 vk12x01234567 = _mm256_load_ps(w + 208); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
629 const __m256 vi13x01234567 = _mm256_maskload_ps(i13, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
630 const __m256 vk13x01234567 = _mm256_load_ps(w + 224); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
633 const __m256 vi14x01234567 = _mm256_maskload_ps(i14, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
634 const __m256 vk14x01234567 = _mm256_load_ps(w + 240); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
637 const __m256 vi15x01234567 = _mm256_maskload_ps(i15, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
638 const __m256 vk15x01234567 = _mm256_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
641 const __m256 vi16x01234567 = _mm256_maskload_ps(i16, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
642 const __m256 vk16x01234567 = _mm256_load_ps(w + 272); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
645 const __m256 vi17x01234567 = _mm256_maskload_ps(i17, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
646 const __m256 vk17x01234567 = _mm256_load_ps(w + 288); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
649 const __m256 vi18x01234567 = _mm256_maskload_ps(i18, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
650 const __m256 vk18x01234567 = _mm256_load_ps(w + 304); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
653 const __m256 vi19x01234567 = _mm256_maskload_ps(i19, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
654 const __m256 vk19x01234567 = _mm256_load_ps(w + 320); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
657 const __m256 vi20x01234567 = _mm256_maskload_ps(i20, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
658 const __m256 vk20x01234567 = _mm256_load_ps(w + 336); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
661 const __m256 vi21x01234567 = _mm256_maskload_ps(i21, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
662 const __m256 vk21x01234567 = _mm256_load_ps(w + 352); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
665 const __m256 vi22x01234567 = _mm256_maskload_ps(i22, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
666 const __m256 vk22x01234567 = _mm256_load_ps(w + 368); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
669 const __m256 vi23x01234567 = _mm256_maskload_ps(i23, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
670 const __m256 vk23x01234567 = _mm256_load_ps(w + 384); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
673 const __m256 vi24x01234567 = _mm256_maskload_ps(i24, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
674 const __m256 vk24x01234567 = _mm256_load_ps(w + 400); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()
678 __m256 vacc01234567 = _mm256_max_ps(vacc01234567p0, vmin); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx()