/external/XNNPACK/src/f16-prelu/gen/ |
D | neonfp16arith-2x16.c | 78 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() local 83 float16x8_t vacc0x01234567 = vmulq_f16(vi0x01234567, vw01234567); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() 84 … const uint16x8_t vm0x01234567 = vcltq_s16(vreinterpretq_s16_f16(vi0x01234567), vmovq_n_s16(0)); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() 88 vacc0x01234567 = vbslq_f16(vm0x01234567, vacc0x01234567, vi0x01234567); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() 97 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() local 102 float16x8_t vacc0x01234567 = vmulq_f16(vi0x01234567, vw01234567); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() 103 … const uint16x8_t vm0x01234567 = vcltq_s16(vreinterpretq_s16_f16(vi0x01234567), vmovq_n_s16(0)); in xnn_f16_prelu_ukernel__neonfp16arith_2x16() 107 vacc0x01234567 = vbslq_f16(vm0x01234567, vacc0x01234567, vi0x01234567); in xnn_f16_prelu_ukernel__neonfp16arith_2x16()
|
D | neonfp16arith-2x8.c | 67 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_prelu_ukernel__neonfp16arith_2x8() local 72 float16x8_t vacc0x01234567 = vmulq_f16(vi0x01234567, vw01234567); in xnn_f16_prelu_ukernel__neonfp16arith_2x8() 73 … const uint16x8_t vm0x01234567 = vcltq_s16(vreinterpretq_s16_f16(vi0x01234567), vmovq_n_s16(0)); in xnn_f16_prelu_ukernel__neonfp16arith_2x8() 77 vacc0x01234567 = vbslq_f16(vm0x01234567, vacc0x01234567, vi0x01234567); in xnn_f16_prelu_ukernel__neonfp16arith_2x8()
|
/external/XNNPACK/src/f32-dwconv/gen/ |
D | up16x4-minmax-fma3.c | 66 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local 72 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() 117 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local 121 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() 157 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3() local 159 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3()
|
D | up16x4-minmax-fma3-acc2.c | 66 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 72 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 120 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 124 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() 162 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2() local 164 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x4__fma3_acc2()
|
D | up16x4-minmax-avx-acc2.c | 66 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 72 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 120 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 124 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() 162 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2() local 164 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx_acc2()
|
D | up16x4-minmax-avx.c | 66 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local 72 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() 117 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local 121 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() 157 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx() local 159 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up16x4__avx()
|
D | up8x4-minmax-fma3.c | 65 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() local 69 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() 105 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3() local 107 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3()
|
D | up8x4-minmax-avx.c | 65 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx() local 69 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx() 105 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx() local 107 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx()
|
D | up8x4-minmax-avx-acc2.c | 65 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local 69 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() 107 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2() local 109 vacc01234567p0 = _mm256_add_ps(vacc01234567p0, _mm256_mul_ps(vi0x01234567, vk0x01234567)); in xnn_f32_dwconv_minmax_ukernel_up8x4__avx_acc2()
|
D | up8x4-minmax-fma3-acc2.c | 65 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local 69 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() 107 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2() local 109 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up8x4__fma3_acc2()
|
D | up16x9-minmax-fma3.c | 91 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local 97 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 187 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local 191 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() 257 const __m256 vi0x01234567 = _mm256_maskload_ps(i0, vmask); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3() local 259 vacc01234567p0 = _mm256_fmadd_ps(vi0x01234567, vk0x01234567, vacc01234567p0); in xnn_f32_dwconv_minmax_ukernel_up16x9__fma3()
|
/external/XNNPACK/src/qs8-gavgpool/gen/ |
D | 7p7x-minmax-sse2-c8-acc2.c | 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 59 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 98 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 113 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 171 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 186 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() 255 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2() local 270 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c8_acc2()
|
D | 7p7x-minmax-ssse3-c8-acc2.c | 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 59 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 98 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 113 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 171 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 186 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() 252 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2() local 267 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-sse2-c24-acc2.c | 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 73 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 137 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 152 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 194 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 223 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 287 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 302 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() 363 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() local 392 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__sse2_c24_acc2() [all …]
|
D | 7p7x-minmax-ssse3-c24-acc2.c | 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 73 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 137 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 152 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 194 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 223 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 287 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 302 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() 363 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() local 392 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c24_acc2() [all …]
|
D | 7p7x-minmax-neon-c8-acc2.c | 44 const int8x8_t vi0x01234567 = vld1_s8(i0); i0 += 8; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local 52 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 81 const int8x8_t vi0x01234567 = vld1_s8(i0); i0 += 8; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local 89 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 146 const int8x8_t vi0x01234567 = vld1_s8(i0); i0 += 8; in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local 154 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() 226 const int8x8_t vi0x01234567 = vld1_s8(i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2() local 234 int16x8_t vacc0x01234567 = vaddl_s8(vi0x01234567, vi1x01234567); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__neon_c8_acc2()
|
D | 7x-minmax-ssse3-c8-acc2.c | 61 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 76 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() 141 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2() local 156 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7x__ssse3_c8_acc2()
|
D | 7p7x-minmax-ssse3-c16-acc2.c | 44 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 66 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 123 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 145 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 221 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 243 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() 347 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2() local 362 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7p7x__ssse3_c16_acc2()
|
D | 7x-minmax-sse2-c8-acc2.c | 61 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 76 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() 144 const __m128i vi0x01234567 = _mm_loadl_epi64((const __m128i*) i0); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2() local 159 …st __m128i vxi0x01234567 = _mm_unpacklo_epi8(vi0x01234567, _mm_cmpgt_epi8(_mm_setzero_si128(), vi0… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse2_c8_acc2()
|
/external/XNNPACK/src/f16-dwconv/gen/ |
D | up16x4-minmax-neonfp16arith.c | 66 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local 70 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 107 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local 109 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() 133 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith() local 135 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith()
|
D | up16x4-minmax-neonfp16arith-acc2.c | 66 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 70 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 110 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 112 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() 138 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2() local 140 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up16x4__neonfp16arith_acc2()
|
D | up8x4-minmax-neonfp16arith-acc2.c | 65 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local 67 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() 93 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2() local 95 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith_acc2()
|
D | up8x4-minmax-neonfp16arith.c | 65 const float16x8_t vi0x01234567 = vld1q_f16(i0); i0 += 8; in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith() local 67 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith() 91 const float16x8_t vi0x01234567 = vld1q_f16(i0); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith() local 93 vacc01234567p0 = vfmaq_f16(vacc01234567p0, vi0x01234567, vk0x01234567); in xnn_f16_dwconv_minmax_ukernel_up8x4__neonfp16arith()
|
/external/XNNPACK/src/f32-prelu/gen/ |
D | avx-2x8.c | 52 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_prelu_ukernel__avx_2x8() local 57 const __m256 vprod0x01234567 = _mm256_mul_ps(vi0x01234567, vw01234567); in xnn_f32_prelu_ukernel__avx_2x8() 60 const __m256 vacc0x01234567 = _mm256_blendv_ps(vi0x01234567, vprod0x01234567, vi0x01234567); in xnn_f32_prelu_ukernel__avx_2x8()
|
D | avx-2x16.c | 53 const __m256 vi0x01234567 = _mm256_loadu_ps(i0); in xnn_f32_prelu_ukernel__avx_2x16() local 60 const __m256 vprod0x01234567 = _mm256_mul_ps(vi0x01234567, vw01234567); in xnn_f32_prelu_ukernel__avx_2x16() 65 const __m256 vacc0x01234567 = _mm256_blendv_ps(vi0x01234567, vprod0x01234567, vi0x01234567); in xnn_f32_prelu_ukernel__avx_2x16()
|