/external/XNNPACK/src/f16-dwconv/gen/ |
D | up16x4-minmax-neonfp16arith.c | 68 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local 108 const float16x8_t vk0x01234567 = vld1q_f16(w + 8); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local 134 const float16x8_t vk0x01234567 = vld1q_f16(w + 16); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local
|
D | up16x4-minmax-neonfp16arith-acc2.c | 68 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 111 const float16x8_t vk0x01234567 = vld1q_f16(w + 8); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 139 const float16x8_t vk0x01234567 = vld1q_f16(w + 16); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local
|
D | up8x4-minmax-neonfp16arith.c | 66 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith() local 92 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith() local
|
D | up8x4-minmax-neonfp16arith-acc2.c | 66 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local 94 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local
|
D | up16x9-minmax-neonfp16arith-acc2.c | 93 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local 171 const float16x8_t vk0x01234567 = vld1q_f16(w + 8); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local 219 const float16x8_t vk0x01234567 = vld1q_f16(w + 16); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith_acc2() local
|
D | up16x9-minmax-neonfp16arith.c | 93 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local 168 const float16x8_t vk0x01234567 = vld1q_f16(w + 8); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local 214 const float16x8_t vk0x01234567 = vld1q_f16(w + 16); in xnn_f16_dwconv_minmax_ukernel_up16x9__neonfp16arith() local
|
D | up8x9-minmax-neonfp16arith.c | 91 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local 137 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith() local
|
D | up8x9-minmax-neonfp16arith-acc2.c | 91 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local 139 const float16x8_t vk0x01234567 = vld1q_f16(w); w += 8; in xnn_f16_dwconv_minmax_ukernel_up8x9__neonfp16arith_acc2() local
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up16x4-minmax-fma3.c | 70 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local 120 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local 158 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local
|
D | up16x4-minmax-avx.c | 70 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local 120 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local 158 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local
|
D | up16x4-minmax-fma3-acc2.c | 70 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 123 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 163 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local
|
D | up16x4-minmax-avx-acc2.c | 70 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 123 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 163 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local
|
D | up8x4-minmax-fma3.c | 68 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() local 106 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() local
|
D | up8x4-minmax-avx.c | 68 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx() local 106 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx() local
|
D | up8x4-minmax-avx-acc2.c | 68 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local 108 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local
|
D | up8x4-minmax-fma3-acc2.c | 68 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local 108 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local
|
D | up16x9-minmax-fma3.c | 95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local 190 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local 258 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local
|
D | up16x9-minmax-avx-acc2.c | 95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local 193 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local 263 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx_acc2() local
|
D | up16x9-minmax-fma3-acc2.c | 95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local 193 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local 263 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3_acc2() local
|
D | up16x9-minmax-avx.c | 95 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local 190 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local 258 const __m256 vk0x01234567 = _mm256_load_ps(w + 16); in xnn_f32_dwconv_minmax_ukernel_up16x9__avx() local
|
D | up8x9-minmax-avx.c | 93 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local 161 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx() local
|
D | up8x9-minmax-fma3.c | 93 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local 161 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3() local
|
D | up8x9-minmax-fma3-acc2.c | 93 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local 163 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__fma3_acc2() local
|
D | up8x9-minmax-avx-acc2.c | 93 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local 163 const __m256 vk0x01234567 = _mm256_load_ps(w + 8); in xnn_f32_dwconv_minmax_ukernel_up8x9__avx_acc2() local
|
/external/XNNPACK/src/qs8-dwconv/gen/ |
D | up8x9-minmax-neon-mul16.c | 94 …const int16x8_t vk0x01234567 = vmovl_s8(vld1_s8(w)); w = (const void*) ((uintptr_t) w + 8 * sizeof… in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() local 178 const int16x8_t vk0x01234567 = vmovl_s8(vld1_s8(w)); in xnn_qs8_dwconv_minmax_ukernel_up8x9__neon_mul16() local
|