/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x9-minmax-avx512f.c | 157 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 159 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 231 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 232 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 286 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 287 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f()
|
D | up32x9-minmax-avx512f-acc2.c | 157 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 159 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 234 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 235 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 291 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 292 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f-acc2.c | 134 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 135 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 191 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 192 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f.c | 134 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 135 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 189 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 190 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f()
|
D | up16x25-minmax-avx512f-acc2.c | 214 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 215 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 367 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 368 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2()
|
D | up16x25-minmax-avx512f.c | 214 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 215 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 365 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 128); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 366 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f()
|
D | up32x25-minmax-avx512f.c | 237 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 239 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 455 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 456 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 606 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 607 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
|
D | up32x25-minmax-avx512f-acc2.c | 237 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 239 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 458 const __m512 vk7x0123456789ABCDEF = _mm512_load_ps(w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 459 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 611 const __m512 vk7x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 256); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 612 …vacc0123456789ABCDEFp1 = _mm512_fmadd_ps(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-avx512skx-mul32.c | 140 …const __m512i vk7x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 143 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 225 …const __m512i vk7x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 227 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
|
D | up32x9-minmax-avx512skx-mul32.c | 163 …const __m512i vk7x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 168 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 279 …const __m512i vk7x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (k + 22… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 282 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
|
D | up16x9-minmax-avx2-mul16.c | 151 …const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 154 … __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 277 …const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 279 …t __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
|
D | up32x9-minmax-avx2-mul16.c | 195 …const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 200 … __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 365 …const __m256i vk7x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 22… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 368 …t __m256i vprod7x0123456789ABCDEF = _mm256_mullo_epi16(vi7x0123456789ABCDEF, vk7x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16()
|