/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x4-minmax-avx512f.c | 96 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local 137 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local 170 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local
|
D | up32x4-minmax-avx512f-acc2.c | 96 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local 140 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local 175 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local
|
D | up16x4-minmax-avx512f.c | 85 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() local 118 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() local
|
D | up16x4-minmax-avx512f-acc2.c | 85 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() local 120 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() local
|
D | up32x9-minmax-avx512f.c | 121 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 207 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 270 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local
|
D | up32x9-minmax-avx512f-acc2.c | 121 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 210 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 275 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local
|
D | up16x9-minmax-avx512f-acc2.c | 110 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 175 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local
|
D | up16x9-minmax-avx512f.c | 110 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 173 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local
|
D | up16x25-minmax-avx512f-acc2.c | 190 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 351 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local
|
D | up16x25-minmax-avx512f.c | 190 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 349 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 64); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local
|
D | up32x25-minmax-avx512f.c | 201 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 431 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 590 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local
|
D | up32x25-minmax-avx512f-acc2.c | 201 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 434 const __m512 vk3x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 595 const __m512 vk3x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-avx512skx-mul32.c | 116 …const __m512i vk3x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 205 …const __m512i vk3x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local
|
D | up32x9-minmax-avx512skx-mul32.c | 127 …const __m512i vk3x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 255 …const __m512i vk3x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (k + 96… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local
|
D | up16x9-minmax-avx2-mul16.c | 115 …const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 245 …const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local
|
D | up32x9-minmax-avx2-mul16.c | 135 …const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 329 …const __m256i vk3x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 96… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local
|