/external/XNNPACK/src/f32-dwconv/gen/ |
D | up32x4-minmax-avx512f-acc2.c | 69 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local 71 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 122 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local 123 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() 163 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2() local 164 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f_acc2()
|
D | up32x4-minmax-avx512f.c | 69 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local 71 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 119 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local 120 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() 158 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f() local 159 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x4__avx512f()
|
D | up16x4-minmax-avx512f.c | 67 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() local 68 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() 106 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f() local 107 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f()
|
D | up16x4-minmax-avx512f-acc2.c | 67 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() local 68 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() 108 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2() local 109 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x4__avx512f_acc2()
|
D | up32x9-minmax-avx512f.c | 94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 96 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 189 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 190 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() 258 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f() local 259 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f()
|
D | up32x9-minmax-avx512f-acc2.c | 94 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 96 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 192 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 193 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() 263 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2() local 264 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f-acc2.c | 92 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 93 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() 163 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2() local 164 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f_acc2()
|
D | up16x9-minmax-avx512f.c | 92 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 93 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() 161 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f() local 162 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x9__avx512f()
|
D | up16x25-minmax-avx512f-acc2.c | 172 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 173 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() 339 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2() local 340 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f_acc2()
|
D | up16x25-minmax-avx512f.c | 172 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 173 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() 337 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f() local 338 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up16x25__avx512f()
|
D | up32x25-minmax-avx512f.c | 174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 176 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 413 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 414 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() 578 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f() local 579 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f()
|
D | up32x25-minmax-avx512f-acc2.c | 174 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 176 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 416 const __m512 vk0x0123456789ABCDEF = _mm512_load_ps(w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 417 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() 583 const __m512 vk0x0123456789ABCDEF = _mm512_maskz_loadu_ps(vmask, w + 32); in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2() local 584 …vacc0123456789ABCDEFp0 = _mm512_fmadd_ps(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF, vacc012345678… in xnn_f32_dwconv_minmax_ukernel_up32x25__avx512f_acc2()
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up16x9-minmax-avx512skx-mul32.c | 98 …const __m512i vk0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 101 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() 190 …const __m512i vk0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32() local 192 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx512skx_mul32()
|
D | up32x9-minmax-avx512skx-mul32.c | 100 …const __m512i vk0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_load_si128((const __m128i*) ((uintpt… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 105 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() 237 … const __m512i vk0x0123456789ABCDEF = _mm512_cvtepi8_epi32(_mm_loadu_si128((const __m128i*) k)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32() local 240 …12_add_epi32(vacc0123456789ABCDEF, _mm512_mullo_epi32(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx512skx_mul32()
|
D | up16x9-minmax-avx2-mul16.c | 88 …const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 91 … __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() 221 …const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16() local 223 …t __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up16x9__avx2_mul16()
|
D | up32x9-minmax-avx2-mul16.c | 90 …const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) ((uintp… in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 95 … __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() 302 … const __m256i vk0x0123456789ABCDEF = _mm256_cvtepi8_epi16(_mm_loadu_si128((const __m128i*) k)); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16() local 305 …t __m256i vprod0x0123456789ABCDEF = _mm256_mullo_epi16(vi0x0123456789ABCDEF, vk0x0123456789ABCDEF); in xnn_qs8_dwconv_minmax_ukernel_up32x9__avx2_mul16()
|