/external/XNNPACK/src/f16-gemm/gen/ |
D | 8x8-minmax-neonfp16arith-ld64.c | 94 float16x8_t vacc7x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() local 117 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 135 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 147 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c1, va7, 1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 165 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 177 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 195 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 207 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 225 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() 250 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 101 float16x8_t vacc7x01234567 = vacc0x01234567; in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 126 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 152 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c0, vb01234567c0); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 173 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c1, va7, 1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 199 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c1, vb01234567c1); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 220 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 246 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c2, vb01234567c2); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 267 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 293 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c3, vb01234567c3); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() 327 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7, vb01234567); in xnn_f16_gemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-igemm/gen/ |
D | 8x8-minmax-neonfp16arith-ld64.c | 82 float16x8_t vacc7x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() local 149 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 167 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 179 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c1, va7, 1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 197 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 209 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 227 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 239 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 257 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() 280 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 8x16-minmax-neonfp16arith-ld64.c | 89 float16x8_t vacc7x01234567 = vacc0x01234567; in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() local 158 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 184 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c0, vb01234567c0); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 205 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c1, va7, 1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 231 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c1, vb01234567c1); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 252 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 278 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c2, vb01234567c2); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 299 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 325 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c3, vb01234567c3); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() 357 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7, vb01234567); in xnn_f16_igemm_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f16-gemm/gen-inc/ |
D | 8x8inc-minmax-neonfp16arith-ld64.c | 96 …float16x8_t vacc7x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() local 119 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 137 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 149 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c1, va7, 1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 167 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 179 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 197 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 209 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 227 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() 252 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x8__neonfp16arith_ld64() [all …]
|
D | 8x16inc-minmax-neonfp16arith-ld64.c | 103 …float16x8_t vacc7x01234567 = vld1q_f16(acc); acc = (const void*) ((uintptr_t) acc + sizeof(float16… in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() local 128 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c0, va7, 0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 154 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c0, vb01234567c0); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 175 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c1, va7, 1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 201 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c1, vb01234567c1); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 222 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c2, va7, 2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 248 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c2, vb01234567c2); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 269 vacc7x01234567 = vfmaq_lane_f16(vacc7x01234567, vb01234567c3, va7, 3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 295 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7c3, vb01234567c3); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() 329 vacc7x01234567 = vfmaq_f16(vacc7x01234567, va7, vb01234567); in xnn_f16_gemminc_minmax_ukernel_8x16__neonfp16arith_ld64() [all …]
|
/external/XNNPACK/src/f32-igemm/gen/ |
D | 8x8-minmax-fma3-broadcast.c | 81 __m256 vacc7x01234567 = vacc0x01234567; in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() local 157 vacc7x01234567 = _mm256_fmadd_ps(va7, vb01234567, vacc7x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 171 vacc7x01234567 = _mm256_min_ps(vacc7x01234567, vmax); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 181 vacc7x01234567 = _mm256_max_ps(vacc7x01234567, vmin); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 184 _mm256_storeu_ps(c7, vacc7x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 204 __m128 vacc7x0123 = _mm256_castps256_ps128(vacc7x01234567); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast() 222 vacc7x0123 = _mm256_extractf128_ps(vacc7x01234567, 1); in xnn_f32_igemm_minmax_ukernel_8x8__fma3_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen-inc/ |
D | 8x8inc-minmax-fma3-broadcast.c | 93 __m256 vacc7x01234567 = _mm256_load_ps(acc + 56); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() local 125 vacc7x01234567 = _mm256_fmadd_ps(va7, vb01234567, vacc7x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 138 vacc7x01234567 = _mm256_min_ps(vacc7x01234567, vmax); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 148 vacc7x01234567 = _mm256_max_ps(vacc7x01234567, vmin); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 151 _mm256_storeu_ps(c7, vacc7x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 179 __m128 vacc7x0123 = _mm256_castps256_ps128(vacc7x01234567); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast() 197 vacc7x0123 = _mm256_extractf128_ps(vacc7x01234567, 1); in xnn_f32_gemminc_minmax_ukernel_8x8__fma3_broadcast()
|
/external/XNNPACK/src/f32-gemm/gen/ |
D | 8x8-minmax-fma3-broadcast.c | 91 __m256 vacc7x01234567 = vacc0x01234567; in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() local 123 vacc7x01234567 = _mm256_fmadd_ps(va7, vb01234567, vacc7x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 136 vacc7x01234567 = _mm256_min_ps(vacc7x01234567, vmax); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 146 vacc7x01234567 = _mm256_max_ps(vacc7x01234567, vmin); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 149 _mm256_storeu_ps(c7, vacc7x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 177 __m128 vacc7x0123 = _mm256_castps256_ps128(vacc7x01234567); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast() 195 vacc7x0123 = _mm256_extractf128_ps(vacc7x01234567, 1); in xnn_f32_gemm_minmax_ukernel_8x8__fma3_broadcast()
|
/external/XNNPACK/src/qs8-gemm/gen/ |
D | 8x8c4-minmax-neondot.c | 260 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 265 … int8x16_t vout6x01234567_7x01234567 = vqmovn_high_s16(vqmovn_s16(vacc6x01234567), vacc7x01234567); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() 274 …const int16x8_t vacc7x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc7x0123), vqmovn_s32(vacc7x… in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot() local 279 …_t vout6x01234567_7x01234567 = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc7x01234567)); in xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 385 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 395 int8x16_t vout7x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc7x01234567), vacc7x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() 411 …const int16x8_t vacc7x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc7x0123), vqmovn_s32(vacc7x… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot() local 421 …int8x16_t vout7x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc7x01234567), vqmovn_s16(vacc7x89ABCD… in xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qs8-igemm/gen/ |
D | 8x8c4-minmax-neondot.c | 284 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 289 … int8x16_t vout6x01234567_7x01234567 = vqmovn_high_s16(vqmovn_s16(vacc6x01234567), vacc7x01234567); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() 298 …const int16x8_t vacc7x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc7x0123), vqmovn_s32(vacc7x… in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot() local 303 …_t vout6x01234567_7x01234567 = vcombine_s8(vqmovn_s16(vacc6x01234567), vqmovn_s16(vacc7x01234567)); in xnn_qs8_igemm_minmax_ukernel_8x8c4__neondot()
|
D | 8x16c4-minmax-neondot.c | 409 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 419 int8x16_t vout7x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc7x01234567), vacc7x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() 435 …const int16x8_t vacc7x01234567 = vqaddq_s16(vcombine_s16(vqmovn_s32(vacc7x0123), vqmovn_s32(vacc7x… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot() local 445 …int8x16_t vout7x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc7x01234567), vqmovn_s16(vacc7x89ABCD… in xnn_qs8_igemm_minmax_ukernel_8x16c4__neondot()
|
/external/XNNPACK/src/qu8-gemm/ |
D | 8x8-minmax-neon.c | 514 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local 519 …nt8x16_t vout6x01234567_7x01234567 = vqmovun_high_s16(vqmovun_s16(vacc6x01234567), vacc7x01234567); in xnn_qu8_gemm_minmax_ukernel_8x8__neon() 535 const int16x8_t vacc7x01234567 = in xnn_qu8_gemm_minmax_ukernel_8x8__neon() local 541 … vout6x01234567_7x01234567 = vcombine_u8(vqmovun_s16(vacc6x01234567), vqmovun_s16(vacc7x01234567)); in xnn_qu8_gemm_minmax_ukernel_8x8__neon()
|
/external/XNNPACK/src/qu8-igemm/ |
D | 8x8-minmax-neon.c | 559 …const int16x8_t vacc7x01234567 = vqaddq_s16(vqmovn_high_s32(vqmovn_s32(vacc7x0123), vacc7x4567), v… in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local 564 …nt8x16_t vout6x01234567_7x01234567 = vqmovun_high_s16(vqmovun_s16(vacc6x01234567), vacc7x01234567); in xnn_qu8_igemm_minmax_ukernel_8x8__neon() 580 const int16x8_t vacc7x01234567 = in xnn_qu8_igemm_minmax_ukernel_8x8__neon() local 586 … vout6x01234567_7x01234567 = vcombine_u8(vqmovun_s16(vacc6x01234567), vqmovun_s16(vacc7x01234567)); in xnn_qu8_igemm_minmax_ukernel_8x8__neon()
|