/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x9-minmax-avx512f.c | 130 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 132 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 213 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 214 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 274 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 275 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f()
|
D | up32x9-minmax-avx512f-acc2.c | 130 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 132 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 216 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 217 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 279 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 280 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f-acc2.c | 116 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 117 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 179 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 180 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f.c | 116 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 117 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 177 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 178 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f()
|
D | up16x25-minmax-avx512f-acc2.c | 196 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 197 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 355 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 356 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2()
|
D | up16x25-minmax-avx512f.c | 196 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 197 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 353 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 80); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 354 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f()
|
D | up32x25-minmax-avx512f.c | 210 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 212 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 437 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 438 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 594 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 595 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
|
D | up32x25-minmax-avx512f-acc2.c | 210 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 212 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 440 const __m512 vk4x0123456789ABCDEF = _mm512_load_ps(w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 441 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 599 const __m512 vk4x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 160); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 600 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-avx512skx-mul32.c | 122 …const __m512i vk4x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 125 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 210 …const __m512i vk4x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 212 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
|
D | up32x9-minmax-avx512skx-mul32.c | 136 …const __m512i vk4x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 141 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 261 …const __m512i vk4x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) (k + 12… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 264 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
|
D | up16x9-minmax-avx2-mul16.c | 124 …const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 127 … __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 253 …const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 255 …t __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
|
D | up32x9-minmax-avx2-mul16.c | 150 …const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 155 … __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 338 …const __m256i vk4x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) (k + 12… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 341 …t __m256i vprod4x0123456789ABCDEF = _mm256_mullo_epi16(vi4x0123456789ABCDEF, vk4x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16()
|